summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-07-11 12:57:57 -0700
committerJakub Kicinski <kuba@kernel.org>2024-07-11 12:58:13 -0700
commit7c8267275de6989a9b682a07d75e89395457ee01 (patch)
treedb28c44520d9f786a4142871b42eb80f0d205ddd
parenta6a9fcb10836105e525ccb8bc1a6af4b20a113be (diff)
parent51df8e0cbaefd432f7029dde94e6c7e4e5b19465 (diff)
downloadlwn-7c8267275de6989a9b682a07d75e89395457ee01.tar.gz
lwn-7c8267275de6989a9b682a07d75e89395457ee01.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR. Conflicts: net/sched/act_ct.c 26488172b029 ("net/sched: Fix UAF when resolving a clash") 3abbd7ed8b76 ("act_ct: prepare for stolen verdict coming from conntrack and nat engine") No adjacent changes. Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--.mailmap1
-rw-r--r--CREDITS4
-rw-r--r--Documentation/arch/riscv/cmodx.rst4
-rw-r--r--Documentation/networking/devlink/devlink-region.rst2
-rw-r--r--MAINTAINERS5
-rw-r--r--Makefile2
-rw-r--r--arch/powerpc/kernel/eeh_pe.c7
-rw-r--r--arch/powerpc/kernel/head_64.S5
-rw-r--r--arch/powerpc/kexec/core_64.c11
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c8
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c5
-rw-r--r--arch/riscv/kernel/machine_kexec.c10
-rw-r--r--arch/riscv/kernel/stacktrace.c3
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c2
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/kvm/kvm-s390.h15
-rw-r--r--arch/s390/kvm/priv.c32
-rw-r--r--arch/s390/mm/pgalloc.c4
-rw-r--r--arch/xtensa/include/asm/current.h2
-rw-r--r--arch/xtensa/include/asm/thread_info.h2
-rw-r--r--drivers/acpi/processor_idle.c37
-rw-r--r--drivers/char/tpm/Makefile2
-rw-r--r--drivers/char/tpm/tpm2-sessions.c419
-rw-r--r--drivers/clk/mediatek/clk-mt8183-mfgcfg.c1
-rw-r--r--drivers/clk/mediatek/clk-mtk.c24
-rw-r--r--drivers/clk/mediatek/clk-mtk.h2
-rw-r--r--drivers/clk/qcom/apss-ipq-pll.c2
-rw-r--r--drivers/clk/qcom/clk-alpha-pll.c3
-rw-r--r--drivers/clk/qcom/gcc-ipq9574.c10
-rw-r--r--drivers/clk/qcom/gcc-sm6350.c10
-rw-r--r--drivers/clk/sunxi-ng/ccu_common.c18
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c4
-rw-r--r--drivers/cpufreq/cpufreq.c3
-rw-r--r--drivers/firmware/sysfb.c12
-rw-r--r--drivers/gpio/gpio-mmio.c2
-rw-r--r--drivers/gpio/gpiolib-of.c22
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c53
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c2
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h2
-rw-r--r--drivers/gpu/drm/drm_fbdev_generic.c3
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c9
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.c3
-rw-r--r--drivers/gpu/drm/panthor/panthor_drv.c6
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.c44
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c2
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c6
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c8
-rw-r--r--drivers/i2c/busses/i2c-pnx.c48
-rw-r--r--drivers/net/dsa/lan9303-core.c23
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp.c1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c15
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c6
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c73
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c9
-rw-r--r--drivers/net/ethernet/lantiq_etop.c4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_star_emac.c7
-rw-r--r--drivers/net/ethernet/micrel/ks8851_common.c10
-rw-r--r--drivers/net/ethernet/micrel/ks8851_spi.c4
-rw-r--r--drivers/net/phy/microchip_t1.c2
-rw-r--r--drivers/net/ppp/ppp_generic.c15
-rw-r--r--drivers/net/wireguard/allowedips.c4
-rw-r--r--drivers/net/wireguard/queueing.h4
-rw-r--r--drivers/net/wireguard/send.c2
-rw-r--r--drivers/of/irq.c18
-rw-r--r--drivers/perf/riscv_pmu.c2
-rw-r--r--drivers/perf/riscv_pmu_sbi.c44
-rw-r--r--drivers/platform/x86/toshiba_acpi.c1
-rw-r--r--drivers/scsi/sd.c5
-rw-r--r--drivers/thermal/gov_power_allocator.c3
-rw-r--r--drivers/thermal/thermal_core.c15
-rw-r--r--drivers/thermal/thermal_core.h6
-rw-r--r--drivers/ufs/core/ufs-mcq.c11
-rw-r--r--drivers/ufs/core/ufshcd.c2
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c2
-rw-r--r--fs/bcachefs/alloc_background.c48
-rw-r--r--fs/bcachefs/alloc_foreground.c2
-rw-r--r--fs/bcachefs/backpointers.c70
-rw-r--r--fs/bcachefs/bkey.c5
-rw-r--r--fs/bcachefs/bkey.h7
-rw-r--r--fs/bcachefs/btree_gc.c24
-rw-r--r--fs/bcachefs/btree_write_buffer.c37
-rw-r--r--fs/bcachefs/btree_write_buffer.h3
-rw-r--r--fs/bcachefs/clock.c7
-rw-r--r--fs/bcachefs/data_update.c44
-rw-r--r--fs/bcachefs/data_update.h5
-rw-r--r--fs/bcachefs/debug.c12
-rw-r--r--fs/bcachefs/eytzinger.h6
-rw-r--r--fs/bcachefs/fs.c11
-rw-r--r--fs/bcachefs/io_read.c4
-rw-r--r--fs/bcachefs/journal.c18
-rw-r--r--fs/bcachefs/journal.h2
-rw-r--r--fs/bcachefs/journal_io.c12
-rw-r--r--fs/bcachefs/lru.c39
-rw-r--r--fs/bcachefs/lru.h3
-rw-r--r--fs/bcachefs/move.c25
-rw-r--r--fs/bcachefs/sb-errors_format.h3
-rw-r--r--fs/bcachefs/super.c11
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/btrfs/qgroup.c10
-rw-r--r--fs/btrfs/ref-verify.c9
-rw-r--r--fs/btrfs/space-info.c24
-rw-r--r--fs/cachefiles/cache.c45
-rw-r--r--fs/cachefiles/daemon.c4
-rw-r--r--fs/cachefiles/internal.h3
-rw-r--r--fs/cachefiles/ondemand.c52
-rw-r--r--fs/cachefiles/volume.c1
-rw-r--r--fs/cachefiles/xattr.c5
-rw-r--r--fs/dcache.c12
-rw-r--r--fs/hfsplus/xattr.c2
-rw-r--r--fs/locks.c2
-rw-r--r--fs/minix/namei.c3
-rw-r--r--fs/netfs/buffered_read.c14
-rw-r--r--fs/netfs/buffered_write.c12
-rw-r--r--fs/netfs/direct_read.c2
-rw-r--r--fs/netfs/direct_write.c8
-rw-r--r--fs/netfs/fscache_cache.c4
-rw-r--r--fs/netfs/fscache_cookie.c28
-rw-r--r--fs/netfs/fscache_io.c12
-rw-r--r--fs/netfs/fscache_main.c2
-rw-r--r--fs/netfs/fscache_volume.c18
-rw-r--r--fs/netfs/internal.h35
-rw-r--r--fs/netfs/io.c12
-rw-r--r--fs/netfs/main.c4
-rw-r--r--fs/netfs/misc.c4
-rw-r--r--fs/netfs/write_collect.c16
-rw-r--r--fs/netfs/write_issue.c36
-rw-r--r--fs/nilfs2/dir.c32
-rw-r--r--fs/smb/client/file.c30
-rw-r--r--fs/smb/common/smb2pdu.h34
-rw-r--r--fs/smb/server/smb2pdu.c22
-rw-r--r--fs/userfaultfd.c7
-rw-r--r--include/linux/closure.h7
-rw-r--r--include/linux/fscache-cache.h6
-rw-r--r--include/linux/mmzone.h3
-rw-r--r--include/linux/page_ref.h57
-rw-r--r--include/linux/pagemap.h11
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/swap.h3
-rw-r--r--include/linux/tpm.h81
-rw-r--r--include/net/tcx.h13
-rw-r--r--include/trace/events/fscache.h4
-rw-r--r--include/uapi/drm/panthor_drm.h5
-rw-r--r--kernel/bpf/helpers.c99
-rwxr-xr-xlib/build_OID_registry4
-rw-r--r--lib/closure.c3
-rw-r--r--mm/damon/core.c23
-rw-r--r--mm/filemap.c20
-rw-r--r--mm/gup.c291
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/hugetlb.c70
-rw-r--r--mm/hugetlb_vmemmap.c16
-rw-r--r--mm/internal.h4
-rw-r--r--mm/memcontrol.c11
-rw-r--r--mm/migrate.c13
-rw-r--r--mm/readahead.c8
-rw-r--r--mm/shmem.c15
-rw-r--r--mm/vmalloc.c10
-rw-r--r--mm/workingset.c14
-rw-r--r--net/core/datagram.c3
-rw-r--r--net/core/skmsg.c3
-rw-r--r--net/ethtool/linkstate.c41
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/tcp_timer.c17
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/netfilter/nf_tables_api.c158
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/sched/act_ct.c8
-rw-r--r--net/sched/sch_ingress.c12
-rw-r--r--net/sunrpc/xprtsock.c7
-rw-r--r--security/integrity/ima/ima_fs.c3
-rw-r--r--tools/perf/util/comm.c29
-rw-r--r--tools/perf/util/dsos.c26
-rw-r--r--tools/testing/selftests/bpf/config3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_links.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_lockup.c91
-rw-r--r--tools/testing/selftests/bpf/progs/timer_lockup.c87
-rw-r--r--tools/testing/selftests/powerpc/flags.mk5
-rw-r--r--tools/testing/selftests/riscv/sigreturn/sigreturn.c2
-rw-r--r--tools/testing/selftests/timens/exec.c6
-rw-r--r--tools/testing/selftests/timens/timer.c2
-rw-r--r--tools/testing/selftests/timens/timerfd.c2
-rw-r--r--tools/testing/selftests/timens/vfork_exec.c4
-rw-r--r--tools/testing/selftests/vDSO/Makefile29
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c16
-rw-r--r--tools/testing/selftests/vDSO/vdso_standalone_test_x86.c18
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile8
195 files changed, 2210 insertions, 1334 deletions
diff --git a/.mailmap b/.mailmap
index a6c619e22efc..81ac1e17ac3c 100644
--- a/.mailmap
+++ b/.mailmap
@@ -384,6 +384,7 @@ Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
Lior David <quic_liord@quicinc.com> <liord@codeaurora.org>
Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com>
+Lorenzo Stoakes <lorenzo.stoakes@oracle.com> <lstoakes@gmail.com>
Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org>
diff --git a/CREDITS b/CREDITS
index 1a1a54555e11..f87c0fa62cfc 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3150,9 +3150,11 @@ S: Triftstra=DFe 55
S: 13353 Berlin
S: Germany
-N: Gustavo Pimental
+N: Gustavo Pimentel
E: gustavo.pimentel@synopsys.com
D: PCI driver for Synopsys DesignWare
+D: Synopsys DesignWare eDMA driver
+D: Synopsys DesignWare xData traffic generator
N: Emanuel Pirker
E: epirker@edu.uni-klu.ac.at
diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst
index 1c0ca06b6c97..8c48bcff3df9 100644
--- a/Documentation/arch/riscv/cmodx.rst
+++ b/Documentation/arch/riscv/cmodx.rst
@@ -62,10 +62,10 @@ cmodx.c::
printf("Value before cmodx: %d\n", value);
// Call prctl before first fence.i is called inside modify_instruction
- prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
+ prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS);
modify_instruction();
// Call prctl after final fence.i is called in process
- prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_OFF, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
+ prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_OFF, PR_RISCV_SCOPE_PER_PROCESS);
value = get_value();
printf("Value after cmodx: %d\n", value);
diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst
index 9232cd7da301..5d0b68f752c0 100644
--- a/Documentation/networking/devlink/devlink-region.rst
+++ b/Documentation/networking/devlink/devlink-region.rst
@@ -49,7 +49,7 @@ example usage
$ devlink region show [ DEV/REGION ]
$ devlink region del DEV/REGION snapshot SNAPSHOT_ID
$ devlink region dump DEV/REGION [ snapshot SNAPSHOT_ID ]
- $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length length
+ $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length LENGTH
# Show all of the exposed regions with region sizes:
$ devlink region show
diff --git a/MAINTAINERS b/MAINTAINERS
index e0f28278e504..9540f9290f56 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6239,9 +6239,8 @@ S: Maintained
F: drivers/usb/dwc3/
DESIGNWARE XDATA IP DRIVER
-M: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
L: linux-pci@vger.kernel.org
-S: Maintained
+S: Orphan
F: Documentation/misc-devices/dw-xdata-pcie.rst
F: drivers/misc/dw-xdata-pcie.c
@@ -14475,7 +14474,7 @@ MEMORY MAPPING
M: Andrew Morton <akpm@linux-foundation.org>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
-R: Lorenzo Stoakes <lstoakes@gmail.com>
+R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
L: linux-mm@kvack.org
S: Maintained
W: http://www.linux-mm.org
diff --git a/Makefile b/Makefile
index 06aa6402b385..b25b5b44af10 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 10
SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
NAME = Baby Opossum Posse
# *DOCUMENTATION*
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index d1030bc52564..d283d281d28e 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -849,6 +849,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
{
struct eeh_dev *edev;
struct pci_dev *pdev;
+ struct pci_bus *bus = NULL;
if (pe->type & EEH_PE_PHB)
return pe->phb->bus;
@@ -859,9 +860,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
/* Retrieve the parent PCI bus of first (top) PCI device */
edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
+ pci_lock_rescan_remove();
pdev = eeh_dev_to_pci_dev(edev);
if (pdev)
- return pdev->bus;
+ bus = pdev->bus;
+ pci_unlock_rescan_remove();
- return NULL;
+ return bus;
}
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 4690c219bfa4..63432a33ec49 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -647,8 +647,9 @@ __after_prom_start:
* Note: This process overwrites the OF exception vectors.
*/
LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET)
- mr. r4,r26 /* In some cases the loader may */
- beq 9f /* have already put us at zero */
+ mr r4,r26 /* Load the virtual source address into r4 */
+ cmpld r3,r4 /* Check if source == dest */
+ beq 9f /* If so skip the copy */
li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 85050be08a23..72b12bc10f90 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -27,6 +27,7 @@
#include <asm/paca.h>
#include <asm/mmu.h>
#include <asm/sections.h> /* _end */
+#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/hw_breakpoint.h>
#include <asm/svm.h>
@@ -317,6 +318,16 @@ void default_machine_kexec(struct kimage *image)
if (!kdump_in_progress())
kexec_prepare_cpus();
+#ifdef CONFIG_PPC_PSERIES
+ /*
+ * This must be done after other CPUs have shut down, otherwise they
+ * could execute the 'scv' instruction, which is not supported with
+ * reloc disabled (see configure_exceptions()).
+ */
+ if (firmware_has_feature(FW_FEATURE_SET_MODE))
+ pseries_disable_reloc_on_exc();
+#endif
+
printk("kexec: Starting switchover sequence.\n");
/* switch to a staticly allocated stack. Based on irq stack code.
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 096d09ed89f6..431be156ca9b 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -61,11 +61,3 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
} else
xics_kexec_teardown_cpu(secondary);
}
-
-void pseries_machine_kexec(struct kimage *image)
-{
- if (firmware_has_feature(FW_FEATURE_SET_MODE))
- pseries_disable_reloc_on_exc();
-
- default_machine_kexec(image);
-}
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index bba4ad192b0f..3968a6970fa8 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -38,7 +38,6 @@ static inline void smp_init_pseries(void) { }
#endif
extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary);
-void pseries_machine_kexec(struct kimage *image);
extern void pSeries_final_fixup(void);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 284a6fa04b0c..b10a25325238 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -343,8 +343,8 @@ static int alloc_dispatch_log_kmem_cache(void)
{
void (*ctor)(void *) = get_dtl_cache_ctor();
- dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
- DISPATCH_LOG_BYTES, 0, ctor);
+ dtl_cache = kmem_cache_create_usercopy("dtl", DISPATCH_LOG_BYTES,
+ DISPATCH_LOG_BYTES, 0, 0, DISPATCH_LOG_BYTES, ctor);
if (!dtl_cache) {
pr_warn("Failed to create dispatch trace log buffer cache\n");
pr_warn("Stolen time statistics will be unreliable\n");
@@ -1159,7 +1159,6 @@ define_machine(pseries) {
.machine_check_exception = pSeries_machine_check_exception,
.machine_check_log_err = pSeries_machine_check_log_err,
#ifdef CONFIG_KEXEC_CORE
- .machine_kexec = pseries_machine_kexec,
.kexec_cpu_down = pseries_kexec_cpu_down,
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index ed9cad20c039..3c830a6f7ef4 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -121,20 +121,12 @@ static void machine_kexec_mask_interrupts(void)
for_each_irq_desc(i, desc) {
struct irq_chip *chip;
- int ret;
chip = irq_desc_get_chip(desc);
if (!chip)
continue;
- /*
- * First try to remove the active state. If this
- * fails, try to EOI the interrupt.
- */
- ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
-
- if (ret && irqd_irq_inprogress(&desc->irq_data) &&
- chip->irq_eoi)
+ if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
chip->irq_eoi(&desc->irq_data);
if (chip->irq_mask)
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 0d3f00eb0bae..10e311b2759d 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -32,6 +32,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
bool (*fn)(void *, unsigned long), void *arg)
{
unsigned long fp, sp, pc;
+ int graph_idx = 0;
int level = 0;
if (regs) {
@@ -68,7 +69,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
pc = regs->ra;
} else {
fp = frame->fp;
- pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+ pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra,
&frame->ra);
if (pc == (unsigned long)ret_from_exception) {
if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index 04db1f993c47..bcf41d6e0df0 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -327,7 +327,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att
event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
if (IS_ERR(event)) {
- pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
+ pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
return PTR_ERR(event);
}
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 95990461888f..9281063636a7 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -427,6 +427,7 @@ struct kvm_vcpu_stat {
u64 instruction_io_other;
u64 instruction_lpsw;
u64 instruction_lpswe;
+ u64 instruction_lpswey;
u64 instruction_pfmf;
u64 instruction_ptff;
u64 instruction_sck;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 82e9631cd9ef..54b5b2565df8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -132,6 +132,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, instruction_io_other),
STATS_DESC_COUNTER(VCPU, instruction_lpsw),
STATS_DESC_COUNTER(VCPU, instruction_lpswe),
+ STATS_DESC_COUNTER(VCPU, instruction_lpswey),
STATS_DESC_COUNTER(VCPU, instruction_pfmf),
STATS_DESC_COUNTER(VCPU, instruction_ptff),
STATS_DESC_COUNTER(VCPU, instruction_sck),
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 111eb5c74784..bf8534218af3 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -138,6 +138,21 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar)
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
}
+static inline u64 kvm_s390_get_base_disp_siy(struct kvm_vcpu *vcpu, u8 *ar)
+{
+ u32 base1 = vcpu->arch.sie_block->ipb >> 28;
+ s64 disp1;
+
+ /* The displacement is a 20bit _SIGNED_ value */
+ disp1 = sign_extend64(((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+ ((vcpu->arch.sie_block->ipb & 0xff00) << 4), 19);
+
+ if (ar)
+ *ar = base1;
+
+ return (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
+}
+
static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
u64 *address1, u64 *address2,
u8 *ar_b1, u8 *ar_b2)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 1be19cc9d73c..1a49b89706f8 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -797,6 +797,36 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
return 0;
}
+static int handle_lpswey(struct kvm_vcpu *vcpu)
+{
+ psw_t new_psw;
+ u64 addr;
+ int rc;
+ u8 ar;
+
+ vcpu->stat.instruction_lpswey++;
+
+ if (!test_kvm_facility(vcpu->kvm, 193))
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ vcpu->arch.sie_block->gpsw = new_psw;
+ if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ return 0;
+}
+
static int handle_stidp(struct kvm_vcpu *vcpu)
{
u64 stidp_data = vcpu->kvm->arch.model.cpuid;
@@ -1462,6 +1492,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
case 0x61:
case 0x62:
return handle_ri(vcpu);
+ case 0x71:
+ return handle_lpswey(vcpu);
default:
return -EOPNOTSUPP;
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index abb629d7e131..7e3e767ab87d 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -55,6 +55,8 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
+ if (!table)
+ return;
pagetable_free(virt_to_ptdesc(table));
}
@@ -262,6 +264,8 @@ static unsigned long *base_crst_alloc(unsigned long val)
static void base_crst_free(unsigned long *table)
{
+ if (!table)
+ return;
pagetable_free(virt_to_ptdesc(table));
}
diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h
index 08010dbf5e09..df275d554788 100644
--- a/arch/xtensa/include/asm/current.h
+++ b/arch/xtensa/include/asm/current.h
@@ -19,7 +19,7 @@
struct task_struct;
-static inline struct task_struct *get_current(void)
+static __always_inline struct task_struct *get_current(void)
{
return current_thread_info()->task;
}
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index 326db1c1d5d8..e0dffcc43b9e 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -91,7 +91,7 @@ struct thread_info {
}
/* how to get the thread information struct from C */
-static inline struct thread_info *current_thread_info(void)
+static __always_inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
__asm__("extui %0, a1, 0, "__stringify(CURRENT_SHIFT)"\n\t"
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index bd6a7857ce05..831fa4a12159 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -16,7 +16,6 @@
#include <linux/acpi.h>
#include <linux/dmi.h>
#include <linux/sched.h> /* need_resched() */
-#include <linux/sort.h>
#include <linux/tick.h>
#include <linux/cpuidle.h>
#include <linux/cpu.h>
@@ -386,25 +385,24 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
}
-static int acpi_cst_latency_cmp(const void *a, const void *b)
+static void acpi_cst_latency_sort(struct acpi_processor_cx *states, size_t length)
{
- const struct acpi_processor_cx *x = a, *y = b;
+ int i, j, k;
- if (!(x->valid && y->valid))
- return 0;
- if (x->latency > y->latency)
- return 1;
- if (x->latency < y->latency)
- return -1;
- return 0;
-}
-static void acpi_cst_latency_swap(void *a, void *b, int n)
-{
- struct acpi_processor_cx *x = a, *y = b;
+ for (i = 1; i < length; i++) {
+ if (!states[i].valid)
+ continue;
- if (!(x->valid && y->valid))
- return;
- swap(x->latency, y->latency);
+ for (j = i - 1, k = i; j >= 0; j--) {
+ if (!states[j].valid)
+ continue;
+
+ if (states[j].latency > states[k].latency)
+ swap(states[j].latency, states[k].latency);
+
+ k = j;
+ }
+ }
}
static int acpi_processor_power_verify(struct acpi_processor *pr)
@@ -449,10 +447,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr)
if (buggy_latency) {
pr_notice("FW issue: working around C-state latencies out of order\n");
- sort(&pr->power.states[1], max_cstate,
- sizeof(struct acpi_processor_cx),
- acpi_cst_latency_cmp,
- acpi_cst_latency_swap);
+ acpi_cst_latency_sort(&pr->power.states[1], max_cstate);
}
lapic_timer_propagate_broadcast(pr);
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index 4c695b0388f3..9bb142c75243 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -16,8 +16,8 @@ tpm-y += eventlog/common.o
tpm-y += eventlog/tpm1.o
tpm-y += eventlog/tpm2.o
tpm-y += tpm-buf.o
+tpm-y += tpm2-sessions.o
-tpm-$(CONFIG_TCG_TPM2_HMAC) += tpm2-sessions.o
tpm-$(CONFIG_ACPI) += tpm_ppi.o eventlog/acpi.o
tpm-$(CONFIG_EFI) += eventlog/efi.o
tpm-$(CONFIG_OF) += eventlog/of.o
diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
index 907ac9956a78..2281d55df545 100644
--- a/drivers/char/tpm/tpm2-sessions.c
+++ b/drivers/char/tpm/tpm2-sessions.c
@@ -83,9 +83,6 @@
#define AES_KEY_BYTES AES_KEYSIZE_128
#define AES_KEY_BITS (AES_KEY_BYTES*8)
-static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
- u32 *handle, u8 *name);
-
/*
* This is the structure that carries all the auth information (like
* session handle, nonces, session key and auth) from use to use it is
@@ -148,6 +145,7 @@ struct tpm2_auth {
u8 name[AUTH_MAX_NAMES][2 + SHA512_DIGEST_SIZE];
};
+#ifdef CONFIG_TCG_TPM2_HMAC
/*
* Name Size based on TPM algorithm (assumes no hash bigger than 255)
*/
@@ -163,6 +161,226 @@ static u8 name_size(const u8 *name)
return size_map[alg] + 2;
}
+static int tpm2_parse_read_public(char *name, struct tpm_buf *buf)
+{
+ struct tpm_header *head = (struct tpm_header *)buf->data;
+ off_t offset = TPM_HEADER_SIZE;
+ u32 tot_len = be32_to_cpu(head->length);
+ u32 val;
+
+ /* we're starting after the header so adjust the length */
+ tot_len -= TPM_HEADER_SIZE;
+
+ /* skip public */
+ val = tpm_buf_read_u16(buf, &offset);
+ if (val > tot_len)
+ return -EINVAL;
+ offset += val;
+ /* name */
+ val = tpm_buf_read_u16(buf, &offset);
+ if (val != name_size(&buf->data[offset]))
+ return -EINVAL;
+ memcpy(name, &buf->data[offset], val);
+ /* forget the rest */
+ return 0;
+}
+
+static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name)
+{
+ struct tpm_buf buf;
+ int rc;
+
+ rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC);
+ if (rc)
+ return rc;
+
+ tpm_buf_append_u32(&buf, handle);
+ rc = tpm_transmit_cmd(chip, &buf, 0, "read public");
+ if (rc == TPM2_RC_SUCCESS)
+ rc = tpm2_parse_read_public(name, &buf);
+
+ tpm_buf_destroy(&buf);
+
+ return rc;
+}
+#endif /* CONFIG_TCG_TPM2_HMAC */
+
+/**
+ * tpm_buf_append_name() - add a handle area to the buffer
+ * @chip: the TPM chip structure
+ * @buf: The buffer to be appended
+ * @handle: The handle to be appended
+ * @name: The name of the handle (may be NULL)
+ *
+ * In order to compute session HMACs, we need to know the names of the
+ * objects pointed to by the handles. For most objects, this is simply
+ * the actual 4 byte handle or an empty buf (in these cases @name
+ * should be NULL) but for volatile objects, permanent objects and NV
+ * areas, the name is defined as the hash (according to the name
+ * algorithm which should be set to sha256) of the public area to
+ * which the two byte algorithm id has been appended. For these
+ * objects, the @name pointer should point to this. If a name is
+ * required but @name is NULL, then TPM2_ReadPublic() will be called
+ * on the handle to obtain the name.
+ *
+ * As with most tpm_buf operations, success is assumed because failure
+ * will be caused by an incorrect programming model and indicated by a
+ * kernel message.
+ */
+void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
+ u32 handle, u8 *name)
+{
+#ifdef CONFIG_TCG_TPM2_HMAC
+ enum tpm2_mso_type mso = tpm2_handle_mso(handle);
+ struct tpm2_auth *auth;
+ int slot;
+#endif
+
+ if (!tpm2_chip_auth(chip)) {
+ tpm_buf_append_u32(buf, handle);
+ /* count the number of handles in the upper bits of flags */
+ buf->handles++;
+ return;
+ }
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+ slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE) / 4;
+ if (slot >= AUTH_MAX_NAMES) {
+ dev_err(&chip->dev, "TPM: too many handles\n");
+ return;
+ }
+ auth = chip->auth;
+ WARN(auth->session != tpm_buf_length(buf),
+ "name added in wrong place\n");
+ tpm_buf_append_u32(buf, handle);
+ auth->session += 4;
+
+ if (mso == TPM2_MSO_PERSISTENT ||
+ mso == TPM2_MSO_VOLATILE ||
+ mso == TPM2_MSO_NVRAM) {
+ if (!name)
+ tpm2_read_public(chip, handle, auth->name[slot]);
+ } else {
+ if (name)
+ dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n");
+ }
+
+ auth->name_h[slot] = handle;
+ if (name)
+ memcpy(auth->name[slot], name, name_size(name));
+#endif
+}
+EXPORT_SYMBOL_GPL(tpm_buf_append_name);
+
+/**
+ * tpm_buf_append_hmac_session() - Append a TPM session element
+ * @chip: the TPM chip structure
+ * @buf: The buffer to be appended
+ * @attributes: The session attributes
+ * @passphrase: The session authority (NULL if none)
+ * @passphrase_len: The length of the session authority (0 if none)
+ *
+ * This fills in a session structure in the TPM command buffer, except
+ * for the HMAC which cannot be computed until the command buffer is
+ * complete. The type of session is controlled by the @attributes,
+ * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the
+ * session won't terminate after tpm_buf_check_hmac_response(),
+ * TPM2_SA_DECRYPT which means this buffers first parameter should be
+ * encrypted with a session key and TPM2_SA_ENCRYPT, which means the
+ * response buffer's first parameter needs to be decrypted (confusing,
+ * but the defines are written from the point of view of the TPM).
+ *
+ * Any session appended by this command must be finalized by calling
+ * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect
+ * and the TPM will reject the command.
+ *
+ * As with most tpm_buf operations, success is assumed because failure
+ * will be caused by an incorrect programming model and indicated by a
+ * kernel message.
+ */
+void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
+ u8 attributes, u8 *passphrase,
+ int passphrase_len)
+{
+#ifdef CONFIG_TCG_TPM2_HMAC
+ u8 nonce[SHA256_DIGEST_SIZE];
+ struct tpm2_auth *auth;
+ u32 len;
+#endif
+
+ if (!tpm2_chip_auth(chip)) {
+ /* offset tells us where the sessions area begins */
+ int offset = buf->handles * 4 + TPM_HEADER_SIZE;
+ u32 len = 9 + passphrase_len;
+
+ if (tpm_buf_length(buf) != offset) {
+ /* not the first session so update the existing length */
+ len += get_unaligned_be32(&buf->data[offset]);
+ put_unaligned_be32(len, &buf->data[offset]);
+ } else {
+ tpm_buf_append_u32(buf, len);
+ }
+ /* auth handle */
+ tpm_buf_append_u32(buf, TPM2_RS_PW);
+ /* nonce */
+ tpm_buf_append_u16(buf, 0);
+ /* attributes */
+ tpm_buf_append_u8(buf, 0);
+ /* passphrase */
+ tpm_buf_append_u16(buf, passphrase_len);
+ tpm_buf_append(buf, passphrase, passphrase_len);
+ return;
+ }
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+ /*
+ * The Architecture Guide requires us to strip trailing zeros
+ * before computing the HMAC
+ */
+ while (passphrase && passphrase_len > 0 && passphrase[passphrase_len - 1] == '\0')
+ passphrase_len--;
+
+ auth = chip->auth;
+ auth->attrs = attributes;
+ auth->passphrase_len = passphrase_len;
+ if (passphrase_len)
+ memcpy(auth->passphrase, passphrase, passphrase_len);
+
+ if (auth->session != tpm_buf_length(buf)) {
+ /* we're not the first session */
+ len = get_unaligned_be32(&buf->data[auth->session]);
+ if (4 + len + auth->session != tpm_buf_length(buf)) {
+ WARN(1, "session length mismatch, cannot append");
+ return;
+ }
+
+ /* add our new session */
+ len += 9 + 2 * SHA256_DIGEST_SIZE;
+ put_unaligned_be32(len, &buf->data[auth->session]);
+ } else {
+ tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE);
+ }
+
+ /* random number for our nonce */
+ get_random_bytes(nonce, sizeof(nonce));
+ memcpy(auth->our_nonce, nonce, sizeof(nonce));
+ tpm_buf_append_u32(buf, auth->handle);
+ /* our new nonce */
+ tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
+ tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
+ tpm_buf_append_u8(buf, auth->attrs);
+ /* and put a placeholder for the hmac */
+ tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
+ tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
+#endif
+}
+EXPORT_SYMBOL_GPL(tpm_buf_append_hmac_session);
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
+ u32 *handle, u8 *name);
+
/*
* It turns out the crypto hmac(sha256) is hard for us to consume
* because it assumes a fixed key and the TPM seems to change the key
@@ -344,82 +562,6 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip)
}
/**
- * tpm_buf_append_hmac_session() - Append a TPM session element
- * @chip: the TPM chip structure
- * @buf: The buffer to be appended
- * @attributes: The session attributes
- * @passphrase: The session authority (NULL if none)
- * @passphrase_len: The length of the session authority (0 if none)
- *
- * This fills in a session structure in the TPM command buffer, except
- * for the HMAC which cannot be computed until the command buffer is
- * complete. The type of session is controlled by the @attributes,
- * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the
- * session won't terminate after tpm_buf_check_hmac_response(),
- * TPM2_SA_DECRYPT which means this buffers first parameter should be
- * encrypted with a session key and TPM2_SA_ENCRYPT, which means the
- * response buffer's first parameter needs to be decrypted (confusing,
- * but the defines are written from the point of view of the TPM).
- *
- * Any session appended by this command must be finalized by calling
- * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect
- * and the TPM will reject the command.
- *
- * As with most tpm_buf operations, success is assumed because failure
- * will be caused by an incorrect programming model and indicated by a
- * kernel message.
- */
-void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
- u8 attributes, u8 *passphrase,
- int passphrase_len)
-{
- u8 nonce[SHA256_DIGEST_SIZE];
- u32 len;
- struct tpm2_auth *auth = chip->auth;
-
- /*
- * The Architecture Guide requires us to strip trailing zeros
- * before computing the HMAC
- */
- while (passphrase && passphrase_len > 0
- && passphrase[passphrase_len - 1] == '\0')
- passphrase_len--;
-
- auth->attrs = attributes;
- auth->passphrase_len = passphrase_len;
- if (passphrase_len)
- memcpy(auth->passphrase, passphrase, passphrase_len);
-
- if (auth->session != tpm_buf_length(buf)) {
- /* we're not the first session */
- len = get_unaligned_be32(&buf->data[auth->session]);
- if (4 + len + auth->session != tpm_buf_length(buf)) {
- WARN(1, "session length mismatch, cannot append");
- return;
- }
-
- /* add our new session */
- len += 9 + 2 * SHA256_DIGEST_SIZE;
- put_unaligned_be32(len, &buf->data[auth->session]);
- } else {
- tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE);
- }
-
- /* random number for our nonce */
- get_random_bytes(nonce, sizeof(nonce));
- memcpy(auth->our_nonce, nonce, sizeof(nonce));
- tpm_buf_append_u32(buf, auth->handle);
- /* our new nonce */
- tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
- tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
- tpm_buf_append_u8(buf, auth->attrs);
- /* and put a placeholder for the hmac */
- tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
- tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
-}
-EXPORT_SYMBOL(tpm_buf_append_hmac_session);
-
-/**
* tpm_buf_fill_hmac_session() - finalize the session HMAC
* @chip: the TPM chip structure
* @buf: The buffer to be appended
@@ -449,6 +591,9 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf)
u8 cphash[SHA256_DIGEST_SIZE];
struct sha256_state sctx;
+ if (!auth)
+ return;
+
/* save the command code in BE format */
auth->ordinal = head->ordinal;
@@ -567,104 +712,6 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf)
}
EXPORT_SYMBOL(tpm_buf_fill_hmac_session);
-static int tpm2_parse_read_public(char *name, struct tpm_buf *buf)
-{
- struct tpm_header *head = (struct tpm_header *)buf->data;
- off_t offset = TPM_HEADER_SIZE;
- u32 tot_len = be32_to_cpu(head->length);
- u32 val;
-
- /* we're starting after the header so adjust the length */
- tot_len -= TPM_HEADER_SIZE;
-
- /* skip public */
- val = tpm_buf_read_u16(buf, &offset);
- if (val > tot_len)
- return -EINVAL;
- offset += val;
- /* name */
- val = tpm_buf_read_u16(buf, &offset);
- if (val != name_size(&buf->data[offset]))
- return -EINVAL;
- memcpy(name, &buf->data[offset], val);
- /* forget the rest */
- return 0;
-}
-
-static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name)
-{
- struct tpm_buf buf;
- int rc;
-
- rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC);
- if (rc)
- return rc;
-
- tpm_buf_append_u32(&buf, handle);
- rc = tpm_transmit_cmd(chip, &buf, 0, "read public");
- if (rc == TPM2_RC_SUCCESS)
- rc = tpm2_parse_read_public(name, &buf);
-
- tpm_buf_destroy(&buf);
-
- return rc;
-}
-
-/**
- * tpm_buf_append_name() - add a handle area to the buffer
- * @chip: the TPM chip structure
- * @buf: The buffer to be appended
- * @handle: The handle to be appended
- * @name: The name of the handle (may be NULL)
- *
- * In order to compute session HMACs, we need to know the names of the
- * objects pointed to by the handles. For most objects, this is simply
- * the actual 4 byte handle or an empty buf (in these cases @name
- * should be NULL) but for volatile objects, permanent objects and NV
- * areas, the name is defined as the hash (according to the name
- * algorithm which should be set to sha256) of the public area to
- * which the two byte algorithm id has been appended. For these
- * objects, the @name pointer should point to this. If a name is
- * required but @name is NULL, then TPM2_ReadPublic() will be called
- * on the handle to obtain the name.
- *
- * As with most tpm_buf operations, success is assumed because failure
- * will be caused by an incorrect programming model and indicated by a
- * kernel message.
- */
-void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
- u32 handle, u8 *name)
-{
- enum tpm2_mso_type mso = tpm2_handle_mso(handle);
- struct tpm2_auth *auth = chip->auth;
- int slot;
-
- slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE)/4;
- if (slot >= AUTH_MAX_NAMES) {
- dev_err(&chip->dev, "TPM: too many handles\n");
- return;
- }
- WARN(auth->session != tpm_buf_length(buf),
- "name added in wrong place\n");
- tpm_buf_append_u32(buf, handle);
- auth->session += 4;
-
- if (mso == TPM2_MSO_PERSISTENT ||
- mso == TPM2_MSO_VOLATILE ||
- mso == TPM2_MSO_NVRAM) {
- if (!name)
- tpm2_read_public(chip, handle, auth->name[slot]);
- } else {
- if (name)
- dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n");
- }
-
- auth->name_h[slot] = handle;
- if (name)
- memcpy(auth->name[slot], name, name_size(name));
-}
-EXPORT_SYMBOL(tpm_buf_append_name);
-
/**
* tpm_buf_check_hmac_response() - check the TPM return HMAC for correctness
* @chip: the TPM chip structure
@@ -705,6 +752,9 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
u32 cc = be32_to_cpu(auth->ordinal);
int parm_len, len, i, handles;
+ if (!auth)
+ return rc;
+
if (auth->session >= TPM_HEADER_SIZE) {
WARN(1, "tpm session not filled correctly\n");
goto out;
@@ -824,8 +874,13 @@ EXPORT_SYMBOL(tpm_buf_check_hmac_response);
*/
void tpm2_end_auth_session(struct tpm_chip *chip)
{
- tpm2_flush_context(chip, chip->auth->handle);
- memzero_explicit(chip->auth, sizeof(*chip->auth));
+ struct tpm2_auth *auth = chip->auth;
+
+ if (!auth)
+ return;
+
+ tpm2_flush_context(chip, auth->handle);
+ memzero_explicit(auth, sizeof(*auth));
}
EXPORT_SYMBOL(tpm2_end_auth_session);
@@ -907,6 +962,11 @@ int tpm2_start_auth_session(struct tpm_chip *chip)
int rc;
u32 null_key;
+ if (!auth) {
+ dev_warn_once(&chip->dev, "auth session is not active\n");
+ return 0;
+ }
+
rc = tpm2_load_null(chip, &null_key);
if (rc)
goto out;
@@ -1301,3 +1361,4 @@ int tpm2_sessions_init(struct tpm_chip *chip)
return rc;
}
+#endif /* CONFIG_TCG_TPM2_HMAC */
diff --git a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c
index ba504e19d420..62d876e150e1 100644
--- a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c
+++ b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c
@@ -29,6 +29,7 @@ static const struct mtk_gate mfg_clks[] = {
static const struct mtk_clk_desc mfg_desc = {
.clks = mfg_clks,
.num_clks = ARRAY_SIZE(mfg_clks),
+ .need_runtime_pm = true,
};
static const struct of_device_id of_match_clk_mt8183_mfg[] = {
diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c
index bd37ab4d1a9b..ba1d1c495bc2 100644
--- a/drivers/clk/mediatek/clk-mtk.c
+++ b/drivers/clk/mediatek/clk-mtk.c
@@ -496,14 +496,16 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev,
}
- devm_pm_runtime_enable(&pdev->dev);
- /*
- * Do a pm_runtime_resume_and_get() to workaround a possible
- * deadlock between clk_register() and the genpd framework.
- */
- r = pm_runtime_resume_and_get(&pdev->dev);
- if (r)
- return r;
+ if (mcd->need_runtime_pm) {
+ devm_pm_runtime_enable(&pdev->dev);
+ /*
+ * Do a pm_runtime_resume_and_get() to workaround a possible
+ * deadlock between clk_register() and the genpd framework.
+ */
+ r = pm_runtime_resume_and_get(&pdev->dev);
+ if (r)
+ return r;
+ }
/* Calculate how many clk_hw_onecell_data entries to allocate */
num_clks = mcd->num_clks + mcd->num_composite_clks;
@@ -585,7 +587,8 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev,
goto unregister_clks;
}
- pm_runtime_put(&pdev->dev);
+ if (mcd->need_runtime_pm)
+ pm_runtime_put(&pdev->dev);
return r;
@@ -618,7 +621,8 @@ free_base:
if (mcd->shared_io && base)
iounmap(base);
- pm_runtime_put(&pdev->dev);
+ if (mcd->need_runtime_pm)
+ pm_runtime_put(&pdev->dev);
return r;
}
diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h
index 22096501a60a..c17fe1c2d732 100644
--- a/drivers/clk/mediatek/clk-mtk.h
+++ b/drivers/clk/mediatek/clk-mtk.h
@@ -237,6 +237,8 @@ struct mtk_clk_desc {
int (*clk_notifier_func)(struct device *dev, struct clk *clk);
unsigned int mfg_clk_idx;
+
+ bool need_runtime_pm;
};
int mtk_clk_pdev_probe(struct platform_device *pdev);
diff --git a/drivers/clk/qcom/apss-ipq-pll.c b/drivers/clk/qcom/apss-ipq-pll.c
index 5f7f537e4ecb..e8632db2c542 100644
--- a/drivers/clk/qcom/apss-ipq-pll.c
+++ b/drivers/clk/qcom/apss-ipq-pll.c
@@ -70,7 +70,6 @@ static struct clk_alpha_pll ipq_pll_stromer_plus = {
static const struct alpha_pll_config ipq5018_pll_config = {
.l = 0x2a,
.config_ctl_val = 0x4001075b,
- .config_ctl_hi_val = 0x304,
.main_output_mask = BIT(0),
.aux_output_mask = BIT(1),
.early_output_mask = BIT(3),
@@ -84,7 +83,6 @@ static const struct alpha_pll_config ipq5018_pll_config = {
static const struct alpha_pll_config ipq5332_pll_config = {
.l = 0x2d,
.config_ctl_val = 0x4001075b,
- .config_ctl_hi_val = 0x304,
.main_output_mask = BIT(0),
.aux_output_mask = BIT(1),
.early_output_mask = BIT(3),
diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c
index d4227909d1fe..c51647e37df8 100644
--- a/drivers/clk/qcom/clk-alpha-pll.c
+++ b/drivers/clk/qcom/clk-alpha-pll.c
@@ -2574,6 +2574,9 @@ static int clk_alpha_pll_stromer_plus_set_rate(struct clk_hw *hw,
regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL_U(pll),
a >> ALPHA_BITWIDTH);
+ regmap_update_bits(pll->clkr.regmap, PLL_USER_CTL(pll),
+ PLL_ALPHA_EN, PLL_ALPHA_EN);
+
regmap_write(pll->clkr.regmap, PLL_MODE(pll), PLL_BYPASSNL);
/* Wait five micro seconds or more */
diff --git a/drivers/clk/qcom/gcc-ipq9574.c b/drivers/clk/qcom/gcc-ipq9574.c
index 0a3f846695b8..f8b9a1e93bef 100644
--- a/drivers/clk/qcom/gcc-ipq9574.c
+++ b/drivers/clk/qcom/gcc-ipq9574.c
@@ -2140,9 +2140,10 @@ static struct clk_rcg2 pcnoc_bfdcd_clk_src = {
static struct clk_branch gcc_crypto_axi_clk = {
.halt_reg = 0x16010,
+ .halt_check = BRANCH_HALT_VOTED,
.clkr = {
- .enable_reg = 0x16010,
- .enable_mask = BIT(0),
+ .enable_reg = 0xb004,
+ .enable_mask = BIT(15),
.hw.init = &(const struct clk_init_data) {
.name = "gcc_crypto_axi_clk",
.parent_hws = (const struct clk_hw *[]) {
@@ -2156,9 +2157,10 @@ static struct clk_branch gcc_crypto_axi_clk = {
static struct clk_branch gcc_crypto_ahb_clk = {
.halt_reg = 0x16014,
+ .halt_check = BRANCH_HALT_VOTED,
.clkr = {
- .enable_reg = 0x16014,
- .enable_mask = BIT(0),
+ .enable_reg = 0xb004,
+ .enable_mask = BIT(16),
.hw.init = &(const struct clk_init_data) {
.name = "gcc_crypto_ahb_clk",
.parent_hws = (const struct clk_hw *[]) {
diff --git a/drivers/clk/qcom/gcc-sm6350.c b/drivers/clk/qcom/gcc-sm6350.c
index cf4a7b6e0b23..0559a33faf00 100644
--- a/drivers/clk/qcom/gcc-sm6350.c
+++ b/drivers/clk/qcom/gcc-sm6350.c
@@ -100,8 +100,8 @@ static struct clk_alpha_pll gpll6 = {
.enable_mask = BIT(6),
.hw.init = &(struct clk_init_data){
.name = "gpll6",
- .parent_hws = (const struct clk_hw*[]){
- &gpll0.clkr.hw,
+ .parent_data = &(const struct clk_parent_data){
+ .fw_name = "bi_tcxo",
},
.num_parents = 1,
.ops = &clk_alpha_pll_fixed_fabia_ops,
@@ -124,7 +124,7 @@ static struct clk_alpha_pll_postdiv gpll6_out_even = {
.clkr.hw.init = &(struct clk_init_data){
.name = "gpll6_out_even",
.parent_hws = (const struct clk_hw*[]){
- &gpll0.clkr.hw,
+ &gpll6.clkr.hw,
},
.num_parents = 1,
.ops = &clk_alpha_pll_postdiv_fabia_ops,
@@ -139,8 +139,8 @@ static struct clk_alpha_pll gpll7 = {
.enable_mask = BIT(7),
.hw.init = &(struct clk_init_data){
.name = "gpll7",
- .parent_hws = (const struct clk_hw*[]){
- &gpll0.clkr.hw,
+ .parent_data = &(const struct clk_parent_data){
+ .fw_name = "bi_tcxo",
},
.num_parents = 1,
.ops = &clk_alpha_pll_fixed_fabia_ops,
diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c
index ac0091b4ce24..be375ce0149c 100644
--- a/drivers/clk/sunxi-ng/ccu_common.c
+++ b/drivers/clk/sunxi-ng/ccu_common.c
@@ -132,7 +132,6 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev,
for (i = 0; i < desc->hw_clks->num ; i++) {
struct clk_hw *hw = desc->hw_clks->hws[i];
- struct ccu_common *common = hw_to_ccu_common(hw);
const char *name;
if (!hw)
@@ -147,14 +146,21 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev,
pr_err("Couldn't register clock %d - %s\n", i, name);
goto err_clk_unreg;
}
+ }
+
+ for (i = 0; i < desc->num_ccu_clks; i++) {
+ struct ccu_common *cclk = desc->ccu_clks[i];
+
+ if (!cclk)
+ continue;
- if (common->max_rate)
- clk_hw_set_rate_range(hw, common->min_rate,
- common->max_rate);
+ if (cclk->max_rate)
+ clk_hw_set_rate_range(&cclk->hw, cclk->min_rate,
+ cclk->max_rate);
else
- WARN(common->min_rate,
+ WARN(cclk->min_rate,
"No max_rate, ignoring min_rate of clock %d - %s\n",
- i, name);
+ i, clk_hw_get_name(&cclk->hw));
}
ret = of_clk_add_hw_provider(node, of_clk_hw_onecell_get,
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 37f1cdf46d29..4ac3a35dcd98 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -890,8 +890,10 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
pr_warn(FW_WARN "P-state 0 is not max freq\n");
- if (acpi_cpufreq_driver.set_boost)
+ if (acpi_cpufreq_driver.set_boost) {
set_boost(policy, acpi_cpufreq_driver.boost_enabled);
+ policy->boost_enabled = acpi_cpufreq_driver.boost_enabled;
+ }
return result;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a45aac17c20f..9e5060b27864 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1431,7 +1431,8 @@ static int cpufreq_online(unsigned int cpu)
}
/* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
- policy->boost_enabled = cpufreq_boost_enabled() && policy_has_boost_freq(policy);
+ if (cpufreq_boost_enabled() && policy_has_boost_freq(policy))
+ policy->boost_enabled = true;
/*
* The initialization has succeeded and the policy is online.
diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c
index 880ffcb50088..921f61507ae8 100644
--- a/drivers/firmware/sysfb.c
+++ b/drivers/firmware/sysfb.c
@@ -101,8 +101,10 @@ static __init struct device *sysfb_parent_dev(const struct screen_info *si)
if (IS_ERR(pdev)) {
return ERR_CAST(pdev);
} else if (pdev) {
- if (!sysfb_pci_dev_is_enabled(pdev))
+ if (!sysfb_pci_dev_is_enabled(pdev)) {
+ pci_dev_put(pdev);
return ERR_PTR(-ENODEV);
+ }
return &pdev->dev;
}
@@ -137,7 +139,7 @@ static __init int sysfb_init(void)
if (compatible) {
pd = sysfb_create_simplefb(si, &mode, parent);
if (!IS_ERR(pd))
- goto unlock_mutex;
+ goto put_device;
}
/* if the FB is incompatible, create a legacy framebuffer device */
@@ -155,7 +157,7 @@ static __init int sysfb_init(void)
pd = platform_device_alloc(name, 0);
if (!pd) {
ret = -ENOMEM;
- goto unlock_mutex;
+ goto put_device;
}
pd->dev.parent = parent;
@@ -170,9 +172,11 @@ static __init int sysfb_init(void)
if (ret)
goto err;
- goto unlock_mutex;
+ goto put_device;
err:
platform_device_put(pd);
+put_device:
+ put_device(parent);
unlock_mutex:
mutex_unlock(&disable_lock);
return ret;
diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c
index 71e1af7c2184..d89e78f0ead3 100644
--- a/drivers/gpio/gpio-mmio.c
+++ b/drivers/gpio/gpio-mmio.c
@@ -619,8 +619,6 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev,
ret = gpiochip_get_ngpios(gc, dev);
if (ret)
gc->ngpio = gc->bgpio_bits;
- else
- gc->bgpio_bits = roundup_pow_of_two(round_up(gc->ngpio, 8));
ret = bgpio_setup_io(gc, dat, set, clr, flags);
if (ret)
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index d75f6ee37028..89d5e64cf68b 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -203,6 +203,24 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np,
*/
{ "qi,lb60", "rb-gpios", true },
#endif
+#if IS_ENABLED(CONFIG_PCI_LANTIQ)
+ /*
+ * According to the PCI specification, the RST# pin is an
+ * active-low signal. However, most of the device trees that
+ * have been widely used for a long time incorrectly describe
+ * reset GPIO as active-high, and were also using wrong name
+ * for the property.
+ */
+ { "lantiq,pci-xway", "gpio-reset", false },
+#endif
+#if IS_ENABLED(CONFIG_TOUCHSCREEN_TSC2005)
+ /*
+ * DTS for Nokia N900 incorrectly specified "active high"
+ * polarity for the reset line, while the chip actually
+ * treats it as "active low".
+ */
+ { "ti,tsc2005", "reset-gpios", false },
+#endif
};
unsigned int i;
@@ -504,9 +522,9 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np,
{ "reset", "reset-n-io", "marvell,nfc-uart" },
{ "reset", "reset-n-io", "mrvl,nfc-uart" },
#endif
-#if !IS_ENABLED(CONFIG_PCI_LANTIQ)
+#if IS_ENABLED(CONFIG_PCI_LANTIQ)
/* MIPS Lantiq PCI */
- { "reset", "gpios-reset", "lantiq,pci-xway" },
+ { "reset", "gpio-reset", "lantiq,pci-xway" },
#endif
/*
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e9ac20bed0f2..3cdcadd41be1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10048,6 +10048,7 @@ skip_modeset:
}
/* Update Freesync settings. */
+ reset_freesync_config_for_crtc(dm_new_crtc_state);
get_freesync_config_for_crtc(dm_new_crtc_state,
dm_new_conn_state);
@@ -11181,6 +11182,49 @@ static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector,
return ret;
}
+static void parse_edid_displayid_vrr(struct drm_connector *connector,
+ struct edid *edid)
+{
+ u8 *edid_ext = NULL;
+ int i;
+ int j = 0;
+ u16 min_vfreq;
+ u16 max_vfreq;
+
+ if (edid == NULL || edid->extensions == 0)
+ return;
+
+ /* Find DisplayID extension */
+ for (i = 0; i < edid->extensions; i++) {
+ edid_ext = (void *)(edid + (i + 1));
+ if (edid_ext[0] == DISPLAYID_EXT)
+ break;
+ }
+
+ if (edid_ext == NULL)
+ return;
+
+ while (j < EDID_LENGTH) {
+ /* Get dynamic video timing range from DisplayID if available */
+ if (EDID_LENGTH - j > 13 && edid_ext[j] == 0x25 &&
+ (edid_ext[j+1] & 0xFE) == 0 && (edid_ext[j+2] == 9)) {
+ min_vfreq = edid_ext[j+9];
+ if (edid_ext[j+1] & 7)
+ max_vfreq = edid_ext[j+10] + ((edid_ext[j+11] & 3) << 8);
+ else
+ max_vfreq = edid_ext[j+10];
+
+ if (max_vfreq && min_vfreq) {
+ connector->display_info.monitor_range.max_vfreq = max_vfreq;
+ connector->display_info.monitor_range.min_vfreq = min_vfreq;
+
+ return;
+ }
+ }
+ j++;
+ }
+}
+
static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector,
struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info)
{
@@ -11302,6 +11346,11 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
if (!adev->dm.freesync_module)
goto update;
+ /* Some eDP panels only have the refresh rate range info in DisplayID */
+ if ((connector->display_info.monitor_range.min_vfreq == 0 ||
+ connector->display_info.monitor_range.max_vfreq == 0))
+ parse_edid_displayid_vrr(connector, edid);
+
if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT ||
sink->sink_signal == SIGNAL_TYPE_EDP)) {
bool edid_check_required = false;
@@ -11309,9 +11358,11 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
if (is_dp_capable_without_timing_msa(adev->dm.dc,
amdgpu_dm_connector)) {
if (edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) {
- freesync_capable = true;
amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq;
amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq;
+ if (amdgpu_dm_connector->max_vfreq -
+ amdgpu_dm_connector->min_vfreq > 10)
+ freesync_capable = true;
} else {
edid_check_required = edid->version > 1 ||
(edid->version == 1 &&
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 6c84b0fa40f4..0782a34689a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -3364,6 +3364,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
&mode_lib->vba.UrgentBurstFactorLumaPre[k],
&mode_lib->vba.UrgentBurstFactorChromaPre[k],
&mode_lib->vba.NotUrgentLatencyHidingPre[k]);
+
+ v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] /
+ 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPreY[i][j][k];
}
{
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index a41812598ce8..8ecc972dbffd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -234,6 +234,7 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s
out->round_trip_ping_latency_dcfclk_cycles = 106;
out->smn_latency_us = 2;
out->dispclk_dppclk_vco_speed_mhz = 3600;
+ out->pct_ideal_dram_bw_after_urgent_pixel_only = 65.0;
break;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 0f8b3336e26d..cbd1c1f26b7a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -294,7 +294,7 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (unsigned int)in_ctx->v20.dml_core_ctx.mp.DCFCLKDeepSleep * 1000;
context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
- if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx] == dml_fclock_change_unsupported)
+ if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)
context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false;
else
context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true;
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index 571691837200..09cbc3afd6d8 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -734,7 +734,7 @@ struct atom_gpio_pin_lut_v2_1
{
struct atom_common_table_header table_header;
/*the real number of this included in the structure is calcualted by using the (whole structure size - the header size)/size of atom_gpio_pin_lut */
- struct atom_gpio_pin_assignment gpio_pin[8];
+ struct atom_gpio_pin_assignment gpio_pin[];
};
diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c
index 97e579c33d84..1e200d815e1a 100644
--- a/drivers/gpu/drm/drm_fbdev_generic.c
+++ b/drivers/gpu/drm/drm_fbdev_generic.c
@@ -84,7 +84,8 @@ static int drm_fbdev_generic_helper_fb_probe(struct drm_fb_helper *fb_helper,
sizes->surface_width, sizes->surface_height,
sizes->surface_bpp);
- format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth);
+ format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp,
+ sizes->surface_depth);
buffer = drm_client_framebuffer_create(client, sizes->surface_width,
sizes->surface_height, format);
if (IS_ERR(buffer))
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 2166208a961d..3860a8ce1e2d 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -420,13 +420,20 @@ static const struct dmi_system_id orientation_data[] = {
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galaxy Book 10.6"),
},
.driver_data = (void *)&lcd1280x1920_rightside_up,
- }, { /* Valve Steam Deck */
+ }, { /* Valve Steam Deck (Jupiter) */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"),
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jupiter"),
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"),
},
.driver_data = (void *)&lcd800x1280_rightside_up,
+ }, { /* Valve Steam Deck (Galileo) */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galileo"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"),
+ },
+ .driver_data = (void *)&lcd800x1280_rightside_up,
}, { /* VIOS LTH17 */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"),
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 3c3fc53376ce..6bff169fa8d4 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2088,6 +2088,9 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port,
u32 ln0, ln1, pin_assignment;
u8 width;
+ if (DISPLAY_VER(dev_priv) >= 14)
+ return;
+
if (!intel_encoder_is_tc(&dig_port->base) ||
intel_tc_port_in_tbt_alt_mode(dig_port))
return;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 856b3ef5edb8..0c71d761d378 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -1001,6 +1001,9 @@ nouveau_connector_get_modes(struct drm_connector *connector)
struct drm_display_mode *mode;
mode = drm_mode_duplicate(dev, nv_connector->native_mode);
+ if (!mode)
+ return 0;
+
drm_mode_probed_add(connector, mode);
ret = 1;
}
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index b8a84f26b3ef..b5e7b919f241 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -86,15 +86,15 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride,
int ret = 0;
void *out_alloc;
+ if (!in->count)
+ return NULL;
+
/* User stride must be at least the minimum object size, otherwise it might
* lack useful information.
*/
if (in->stride < min_stride)
return ERR_PTR(-EINVAL);
- if (!in->count)
- return NULL;
-
out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL);
if (!out_alloc)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 79ffcbc41d78..9a0ff48f7061 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -459,6 +459,16 @@ struct panthor_queue {
atomic64_t seqno;
/**
+ * @last_fence: Fence of the last submitted job.
+ *
+ * We return this fence when we get an empty command stream.
+ * This way, we are guaranteed that all earlier jobs have completed
+ * when drm_sched_job::s_fence::finished without having to feed
+ * the CS ring buffer with a dummy job that only signals the fence.
+ */
+ struct dma_fence *last_fence;
+
+ /**
* @in_flight_jobs: List containing all in-flight jobs.
*
* Used to keep track and signal panthor_job::done_fence when the
@@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
panthor_kernel_bo_destroy(queue->ringbuf);
panthor_kernel_bo_destroy(queue->iface.mem);
+ /* Release the last_fence we were holding, if any. */
+ dma_fence_put(queue->fence_ctx.last_fence);
+
kfree(queue);
}
@@ -2784,9 +2797,6 @@ static void group_sync_upd_work(struct work_struct *work)
spin_lock(&queue->fence_ctx.lock);
list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
- if (!job->call_info.size)
- continue;
-
if (syncobj->seqno < job->done_fence->seqno)
break;
@@ -2865,11 +2875,14 @@ queue_run_job(struct drm_sched_job *sched_job)
static_assert(sizeof(call_instrs) % 64 == 0,
"call_instrs is not aligned on a cacheline");
- /* Stream size is zero, nothing to do => return a NULL fence and let
- * drm_sched signal the parent.
+ /* Stream size is zero, nothing to do except making sure all previously
+ * submitted jobs are done before we signal the
+ * drm_sched_job::s_fence::finished fence.
*/
- if (!job->call_info.size)
- return NULL;
+ if (!job->call_info.size) {
+ job->done_fence = dma_fence_get(queue->fence_ctx.last_fence);
+ return dma_fence_get(job->done_fence);
+ }
ret = pm_runtime_resume_and_get(ptdev->base.dev);
if (drm_WARN_ON(&ptdev->base, ret))
@@ -2928,6 +2941,10 @@ queue_run_job(struct drm_sched_job *sched_job)
}
}
+ /* Update the last fence. */
+ dma_fence_put(queue->fence_ctx.last_fence);
+ queue->fence_ctx.last_fence = dma_fence_get(job->done_fence);
+
done_fence = dma_fence_get(job->done_fence);
out_unlock:
@@ -3378,10 +3395,15 @@ panthor_job_create(struct panthor_file *pfile,
goto err_put_job;
}
- job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
- if (!job->done_fence) {
- ret = -ENOMEM;
- goto err_put_job;
+ /* Empty command streams don't need a fence, they'll pick the one from
+ * the previously submitted job.
+ */
+ if (job->call_info.size) {
+ job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
+ if (!job->done_fence) {
+ ret = -ENOMEM;
+ goto err_put_job;
+ }
}
ret = drm_sched_job_init(&job->base,
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 2ef201a072f1..e66a230331ee 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -642,7 +642,7 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev,
if (r)
goto error_unlock;
- if (bo_va->it.start)
+ if (bo_va->it.start && bo_va->bo)
r = radeon_vm_bo_update(rdev, bo_va, bo_va->bo->tbo.resource);
error_unlock:
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6396dece0db1..2427be8bc97f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -346,6 +346,7 @@ static void ttm_bo_release(struct kref *kref)
if (!dma_resv_test_signaled(bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP) ||
(want_init_on_free() && (bo->ttm != NULL)) ||
+ bo->type == ttm_bo_type_sg ||
!dma_resv_trylock(bo->base.resv)) {
/* The BO is not idle, resurrect it for delayed destroy */
ttm_bo_flush_all_fences(bo);
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 577bd7043740..0443e07880a0 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -342,7 +342,7 @@ static void init_steering_oaddrm(struct xe_gt *gt)
else
gt->steering[OADDRM].group_target = 1;
- gt->steering[DSS].instance_target = 0; /* unused */
+ gt->steering[OADDRM].instance_target = 0; /* unused */
}
static void init_steering_sqidi_psmi(struct xe_gt *gt)
@@ -357,8 +357,8 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt)
static void init_steering_inst0(struct xe_gt *gt)
{
- gt->steering[DSS].group_target = 0; /* unused */
- gt->steering[DSS].instance_target = 0; /* unused */
+ gt->steering[INSTANCE0].group_target = 0; /* unused */
+ gt->steering[INSTANCE0].instance_target = 0; /* unused */
}
static const struct {
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 65e5a3f4c340..198f5c2189cb 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1334,7 +1334,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
GFP_KERNEL, true, 0);
if (IS_ERR(sa_bo)) {
err = PTR_ERR(sa_bo);
- goto err;
+ goto err_bb;
}
ppgtt_ofs = NUM_KERNEL_PDE +
@@ -1385,7 +1385,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
update_idx);
if (IS_ERR(job)) {
err = PTR_ERR(job);
- goto err_bb;
+ goto err_sa;
}
/* Wait on BO move */
@@ -1434,12 +1434,12 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
err_job:
xe_sched_job_put(job);
+err_sa:
+ drm_suballoc_free(sa_bo, NULL);
err_bb:
if (!q)
mutex_unlock(&m->job_mutex);
xe_bb_free(bb, NULL);
-err:
- drm_suballoc_free(sa_bo, NULL);
return ERR_PTR(err);
}
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index a12525b3186b..f448505d5468 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -15,7 +15,6 @@
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/i2c.h>
-#include <linux/timer.h>
#include <linux/completion.h>
#include <linux/platform_device.h>
#include <linux/io.h>
@@ -32,7 +31,6 @@ struct i2c_pnx_mif {
int ret; /* Return value */
int mode; /* Interface mode */
struct completion complete; /* I/O completion */
- struct timer_list timer; /* Timeout */
u8 * buf; /* Data buffer */
int len; /* Length of data buffer */
int order; /* RX Bytes to order via TX */
@@ -117,24 +115,6 @@ static inline int wait_reset(struct i2c_pnx_algo_data *data)
return (timeout <= 0);
}
-static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data)
-{
- struct timer_list *timer = &alg_data->mif.timer;
- unsigned long expires = msecs_to_jiffies(alg_data->timeout);
-
- if (expires <= 1)
- expires = 2;
-
- del_timer_sync(timer);
-
- dev_dbg(&alg_data->adapter.dev, "Timer armed at %lu plus %lu jiffies.\n",
- jiffies, expires);
-
- timer->expires = jiffies + expires;
-
- add_timer(timer);
-}
-
/**
* i2c_pnx_start - start a device
* @slave_addr: slave address
@@ -259,8 +239,6 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data)
~(mcntrl_afie | mcntrl_naie | mcntrl_drmie),
I2C_REG_CTL(alg_data));
- del_timer_sync(&alg_data->mif.timer);
-
dev_dbg(&alg_data->adapter.dev,
"%s(): Waking up xfer routine.\n",
__func__);
@@ -276,8 +254,6 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data)
~(mcntrl_afie | mcntrl_naie | mcntrl_drmie),
I2C_REG_CTL(alg_data));
- /* Stop timer. */
- del_timer_sync(&alg_data->mif.timer);
dev_dbg(&alg_data->adapter.dev,
"%s(): Waking up xfer routine after zero-xfer.\n",
__func__);
@@ -364,8 +340,6 @@ static int i2c_pnx_master_rcv(struct i2c_pnx_algo_data *alg_data)
mcntrl_drmie | mcntrl_daie);
iowrite32(ctl, I2C_REG_CTL(alg_data));
- /* Kill timer. */
- del_timer_sync(&alg_data->mif.timer);
complete(&alg_data->mif.complete);
}
}
@@ -400,8 +374,6 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id)
mcntrl_drmie);
iowrite32(ctl, I2C_REG_CTL(alg_data));
- /* Stop timer, to prevent timeout. */
- del_timer_sync(&alg_data->mif.timer);
complete(&alg_data->mif.complete);
} else if (stat & mstatus_nai) {
/* Slave did not acknowledge, generate a STOP */
@@ -419,8 +391,6 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id)
/* Our return value. */
alg_data->mif.ret = -EIO;
- /* Stop timer, to prevent timeout. */
- del_timer_sync(&alg_data->mif.timer);
complete(&alg_data->mif.complete);
} else {
/*
@@ -453,9 +423,8 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void i2c_pnx_timeout(struct timer_list *t)
+static void i2c_pnx_timeout(struct i2c_pnx_algo_data *alg_data)
{
- struct i2c_pnx_algo_data *alg_data = from_timer(alg_data, t, mif.timer);
u32 ctl;
dev_err(&alg_data->adapter.dev,
@@ -472,7 +441,6 @@ static void i2c_pnx_timeout(struct timer_list *t)
iowrite32(ctl, I2C_REG_CTL(alg_data));
wait_reset(alg_data);
alg_data->mif.ret = -EIO;
- complete(&alg_data->mif.complete);
}
static inline void bus_reset_if_active(struct i2c_pnx_algo_data *alg_data)
@@ -514,6 +482,7 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
struct i2c_msg *pmsg;
int rc = 0, completed = 0, i;
struct i2c_pnx_algo_data *alg_data = adap->algo_data;
+ unsigned long time_left;
u32 stat;
dev_dbg(&alg_data->adapter.dev,
@@ -548,7 +517,6 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
dev_dbg(&alg_data->adapter.dev, "%s(): mode %d, %d bytes\n",
__func__, alg_data->mif.mode, alg_data->mif.len);
- i2c_pnx_arm_timer(alg_data);
/* initialize the completion var */
init_completion(&alg_data->mif.complete);
@@ -564,7 +532,10 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
break;
/* Wait for completion */
- wait_for_completion(&alg_data->mif.complete);
+ time_left = wait_for_completion_timeout(&alg_data->mif.complete,
+ alg_data->timeout);
+ if (time_left == 0)
+ i2c_pnx_timeout(alg_data);
if (!(rc = alg_data->mif.ret))
completed++;
@@ -653,7 +624,10 @@ static int i2c_pnx_probe(struct platform_device *pdev)
alg_data->adapter.algo_data = alg_data;
alg_data->adapter.nr = pdev->id;
- alg_data->timeout = I2C_PNX_TIMEOUT_DEFAULT;
+ alg_data->timeout = msecs_to_jiffies(I2C_PNX_TIMEOUT_DEFAULT);
+ if (alg_data->timeout <= 1)
+ alg_data->timeout = 2;
+
#ifdef CONFIG_OF
alg_data->adapter.dev.of_node = of_node_get(pdev->dev.of_node);
if (pdev->dev.of_node) {
@@ -673,8 +647,6 @@ static int i2c_pnx_probe(struct platform_device *pdev)
if (IS_ERR(alg_data->clk))
return PTR_ERR(alg_data->clk);
- timer_setup(&alg_data->mif.timer, i2c_pnx_timeout, 0);
-
snprintf(alg_data->adapter.name, sizeof(alg_data->adapter.name),
"%s", pdev->name);
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index 02f07b870f10..268949939636 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -1047,31 +1047,31 @@ static int lan9303_get_sset_count(struct dsa_switch *ds, int port, int sset)
return ARRAY_SIZE(lan9303_mib);
}
-static int lan9303_phy_read(struct dsa_switch *ds, int phy, int regnum)
+static int lan9303_phy_read(struct dsa_switch *ds, int port, int regnum)
{
struct lan9303 *chip = ds->priv;
int phy_base = chip->phy_addr_base;
- if (phy == phy_base)
+ if (port == 0)
return lan9303_virt_phy_reg_read(chip, regnum);
- if (phy > phy_base + 2)
+ if (port > 2)
return -ENODEV;
- return chip->ops->phy_read(chip, phy, regnum);
+ return chip->ops->phy_read(chip, phy_base + port, regnum);
}
-static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum,
+static int lan9303_phy_write(struct dsa_switch *ds, int port, int regnum,
u16 val)
{
struct lan9303 *chip = ds->priv;
int phy_base = chip->phy_addr_base;
- if (phy == phy_base)
+ if (port == 0)
return lan9303_virt_phy_reg_write(chip, regnum, val);
- if (phy > phy_base + 2)
+ if (port > 2)
return -ENODEV;
- return chip->ops->phy_write(chip, phy, regnum, val);
+ return chip->ops->phy_write(chip, phy_base + port, regnum, val);
}
static int lan9303_port_enable(struct dsa_switch *ds, int port,
@@ -1099,7 +1099,7 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port)
vlan_vid_del(dsa_port_to_conduit(dp), htons(ETH_P_8021Q), port);
lan9303_disable_processing_port(chip, port);
- lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN);
+ lan9303_phy_write(ds, port, MII_BMCR, BMCR_PDOWN);
}
static int lan9303_port_bridge_join(struct dsa_switch *ds, int port,
@@ -1374,8 +1374,6 @@ static const struct dsa_switch_ops lan9303_switch_ops = {
static int lan9303_register_switch(struct lan9303 *chip)
{
- int base;
-
chip->ds = devm_kzalloc(chip->dev, sizeof(*chip->ds), GFP_KERNEL);
if (!chip->ds)
return -ENOMEM;
@@ -1385,8 +1383,7 @@ static int lan9303_register_switch(struct lan9303 *chip)
chip->ds->priv = chip;
chip->ds->ops = &lan9303_switch_ops;
chip->ds->phylink_mac_ops = &lan9303_phylink_mac_ops;
- base = chip->phy_addr_base;
- chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1 + base, base);
+ chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1, 0);
return dsa_register_switch(chip->ds);
}
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
index a806dadc4196..20c6529ec135 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -1380,6 +1380,7 @@ static int bcmasp_probe(struct platform_device *pdev)
dev_err(dev, "Cannot create eth interface %d\n", i);
bcmasp_remove_intfs(priv);
of_node_put(intf_node);
+ ret = -ENOMEM;
goto of_put_exit;
}
list_add_tail(&intf->list, &priv->intfs);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 220d05e2f6fa..80fce0aaad66 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6282,6 +6282,21 @@ static u16 bnxt_get_max_rss_ring(struct bnxt *bp)
return max_ring;
}
+u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp)
+{
+ u16 i, tbl_size, max_ring = 0;
+ struct bnxt_rss_ctx *rss_ctx;
+
+ tbl_size = bnxt_get_rxfh_indir_size(bp->dev);
+
+ list_for_each_entry(rss_ctx, &bp->rss_ctx_list, list) {
+ for (i = 0; i < tbl_size; i++)
+ max_ring = max(max_ring, rss_ctx->rss_indir_tbl[i]);
+ }
+
+ return max_ring;
+}
+
int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings)
{
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index e46bd11e52b0..3c8826875ceb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2814,6 +2814,7 @@ int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic,
void bnxt_fill_ipv6_mask(__be32 mask[4]);
int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx);
void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx);
+u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp);
int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings);
int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic);
int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index bf157f6cc042..4d53ec7adc61 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -961,6 +961,12 @@ static int bnxt_set_channels(struct net_device *dev,
return rc;
}
+ if (req_rx_rings < bp->rx_nr_rings &&
+ req_rx_rings <= bnxt_get_max_rss_ctx_ring(bp)) {
+ netdev_warn(dev, "Can't deactivate rings used by RSS contexts\n");
+ return -EINVAL;
+ }
+
if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) !=
bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) &&
netif_is_rxfh_configured(dev)) {
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 2e98a2a0bead..ce227b56cf72 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1109,6 +1109,46 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
}
/**
+ * e1000e_force_smbus - Force interfaces to transition to SMBUS mode.
+ * @hw: pointer to the HW structure
+ *
+ * Force the MAC and the PHY to SMBUS mode. Assumes semaphore already
+ * acquired.
+ *
+ * Return: 0 on success, negative errno on failure.
+ **/
+static s32 e1000e_force_smbus(struct e1000_hw *hw)
+{
+ u16 smb_ctrl = 0;
+ u32 ctrl_ext;
+ s32 ret_val;
+
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
+ /* Force SMBus mode in the PHY */
+ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &smb_ctrl);
+ if (ret_val) {
+ e1000e_enable_phy_retry(hw);
+ return ret_val;
+ }
+
+ smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
+ e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, smb_ctrl);
+
+ e1000e_enable_phy_retry(hw);
+
+ /* Force SMBus mode in the MAC */
+ ctrl_ext = er32(CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, ctrl_ext);
+
+ return 0;
+}
+
+/**
* e1000_enable_ulp_lpt_lp - configure Ultra Low Power mode for LynxPoint-LP
* @hw: pointer to the HW structure
* @to_sx: boolean indicating a system power state transition to Sx
@@ -1165,6 +1205,14 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
if (ret_val)
goto out;
+ if (hw->mac.type != e1000_pch_mtp) {
+ ret_val = e1000e_force_smbus(hw);
+ if (ret_val) {
+ e_dbg("Failed to force SMBUS: %d\n", ret_val);
+ goto release;
+ }
+ }
+
/* Si workaround for ULP entry flow on i127/rev6 h/w. Enable
* LPLU and disable Gig speed when entering ULP
*/
@@ -1225,27 +1273,12 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
}
release:
- /* Switching PHY interface always returns MDI error
- * so disable retry mechanism to avoid wasting time
- */
- e1000e_disable_phy_retry(hw);
-
- /* Force SMBus mode in PHY */
- ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
- if (ret_val) {
- e1000e_enable_phy_retry(hw);
- hw->phy.ops.release(hw);
- goto out;
+ if (hw->mac.type == e1000_pch_mtp) {
+ ret_val = e1000e_force_smbus(hw);
+ if (ret_val)
+ e_dbg("Failed to force SMBUS over MTL system: %d\n",
+ ret_val);
}
- phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
- e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
-
- e1000e_enable_phy_retry(hw);
-
- /* Force SMBus mode in MAC */
- mac_reg = er32(CTRL_EXT);
- mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_reg);
hw->phy.ops.release(hw);
out:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 284c3fad5a6e..310513d9321b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -13293,6 +13293,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
bool need_reset;
int i;
+ /* VSI shall be deleted in a moment, block loading new programs */
+ if (prog && test_bit(__I40E_IN_REMOVE, pf->state))
+ return -EINVAL;
+
/* Don't allow frames that span over multiple buffers */
if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags");
@@ -13301,14 +13305,9 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
/* When turning XDP on->off/off->on we reset and rebuild the rings. */
need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);
-
if (need_reset)
i40e_prep_for_reset(pf);
- /* VSI shall be deleted in a moment, just return EINVAL */
- if (test_bit(__I40E_IN_REMOVE, pf->state))
- return -EINVAL;
-
old_prog = xchg(&vsi->xdp_prog, prog);
if (need_reset) {
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 5352fee62d2b..0b9982804370 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -217,9 +217,9 @@ ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch)
if (ch->dma.irq)
free_irq(ch->dma.irq, priv);
if (IS_RX(ch->idx)) {
- int desc;
+ struct ltq_dma_channel *dma = &ch->dma;
- for (desc = 0; desc < LTQ_DESC_NUM; desc++)
+ for (dma->desc = 0; dma->desc < LTQ_DESC_NUM; dma->desc++)
dev_kfree_skb_any(ch->skb[ch->dma.desc]);
}
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 5a4f774aed64..ac7ee3f3598c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1643,7 +1643,7 @@ static int rvu_check_rsrc_availability(struct rvu *rvu,
if (req->ssow > block->lf.max) {
dev_err(&rvu->pdev->dev,
"Func 0x%x: Invalid SSOW req, %d > max %d\n",
- pcifunc, req->sso, block->lf.max);
+ pcifunc, req->ssow, block->lf.max);
return -EINVAL;
}
mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr);
diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index 31aebeb2e285..25989c79c92e 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1524,6 +1524,7 @@ static int mtk_star_probe(struct platform_device *pdev)
{
struct device_node *of_node;
struct mtk_star_priv *priv;
+ struct phy_device *phydev;
struct net_device *ndev;
struct device *dev;
void __iomem *base;
@@ -1649,6 +1650,12 @@ static int mtk_star_probe(struct platform_device *pdev)
netif_napi_add(ndev, &priv->rx_napi, mtk_star_rx_poll);
netif_napi_add_tx(ndev, &priv->tx_napi, mtk_star_tx_poll);
+ phydev = of_phy_find_device(priv->phy_node);
+ if (phydev) {
+ phydev->mac_managed_pm = true;
+ put_device(&phydev->mdio.dev);
+ }
+
return devm_register_netdev(dev, ndev);
}
diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
index 6453c92f0fa7..7fa1820db9cc 100644
--- a/drivers/net/ethernet/micrel/ks8851_common.c
+++ b/drivers/net/ethernet/micrel/ks8851_common.c
@@ -352,11 +352,11 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
netif_dbg(ks, intr, ks->netdev,
"%s: txspace %d\n", __func__, tx_space);
- spin_lock(&ks->statelock);
+ spin_lock_bh(&ks->statelock);
ks->tx_space = tx_space;
if (netif_queue_stopped(ks->netdev))
netif_wake_queue(ks->netdev);
- spin_unlock(&ks->statelock);
+ spin_unlock_bh(&ks->statelock);
}
if (status & IRQ_SPIBEI) {
@@ -482,6 +482,7 @@ static int ks8851_net_open(struct net_device *dev)
ks8851_wrreg16(ks, KS_IER, ks->rc_ier);
ks->queued_len = 0;
+ ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR);
netif_start_queue(ks->netdev);
netif_dbg(ks, ifup, ks->netdev, "network device up\n");
@@ -635,14 +636,14 @@ static void ks8851_set_rx_mode(struct net_device *dev)
/* schedule work to do the actual set of the data if needed */
- spin_lock(&ks->statelock);
+ spin_lock_bh(&ks->statelock);
if (memcmp(&rxctrl, &ks->rxctrl, sizeof(rxctrl)) != 0) {
memcpy(&ks->rxctrl, &rxctrl, sizeof(ks->rxctrl));
schedule_work(&ks->rxctrl_work);
}
- spin_unlock(&ks->statelock);
+ spin_unlock_bh(&ks->statelock);
}
static int ks8851_set_mac_address(struct net_device *dev, void *addr)
@@ -1101,7 +1102,6 @@ int ks8851_probe_common(struct net_device *netdev, struct device *dev,
int ret;
ks->netdev = netdev;
- ks->tx_space = 6144;
ks->gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
ret = PTR_ERR_OR_ZERO(ks->gpio);
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
index 670c1de966db..3062cc0f9199 100644
--- a/drivers/net/ethernet/micrel/ks8851_spi.c
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -340,10 +340,10 @@ static void ks8851_tx_work(struct work_struct *work)
tx_space = ks8851_rdreg16_spi(ks, KS_TXMIR);
- spin_lock(&ks->statelock);
+ spin_lock_bh(&ks->statelock);
ks->queued_len -= dequeued_len;
ks->tx_space = tx_space;
- spin_unlock(&ks->statelock);
+ spin_unlock_bh(&ks->statelock);
ks8851_unlock_spi(ks, &flags);
}
diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c
index a838b61cd844..a35528497a57 100644
--- a/drivers/net/phy/microchip_t1.c
+++ b/drivers/net/phy/microchip_t1.c
@@ -748,7 +748,7 @@ static int lan87xx_cable_test_report(struct phy_device *phydev)
ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
lan87xx_cable_test_report_trans(detect));
- return 0;
+ return phy_init_hw(phydev);
}
static int lan87xx_cable_test_get_status(struct phy_device *phydev,
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 0a65b6d690fe..eb9acfcaeb09 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -70,6 +70,7 @@
#define MPHDRLEN_SSN 4 /* ditto with short sequence numbers */
#define PPP_PROTO_LEN 2
+#define PPP_LCP_HDRLEN 4
/*
* An instance of /dev/ppp can be associated with either a ppp
@@ -493,6 +494,15 @@ static ssize_t ppp_read(struct file *file, char __user *buf,
return ret;
}
+static bool ppp_check_packet(struct sk_buff *skb, size_t count)
+{
+ /* LCP packets must include LCP header which 4 bytes long:
+ * 1-byte code, 1-byte identifier, and 2-byte length.
+ */
+ return get_unaligned_be16(skb->data) != PPP_LCP ||
+ count >= PPP_PROTO_LEN + PPP_LCP_HDRLEN;
+}
+
static ssize_t ppp_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -515,6 +525,11 @@ static ssize_t ppp_write(struct file *file, const char __user *buf,
kfree_skb(skb);
goto out;
}
+ ret = -EINVAL;
+ if (unlikely(!ppp_check_packet(skb, count))) {
+ kfree_skb(skb);
+ goto out;
+ }
switch (pf->kind) {
case INTERFACE:
diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
index 0ba714ca5185..4b8528206cc8 100644
--- a/drivers/net/wireguard/allowedips.c
+++ b/drivers/net/wireguard/allowedips.c
@@ -15,8 +15,8 @@ static void swap_endian(u8 *dst, const u8 *src, u8 bits)
if (bits == 32) {
*(u32 *)dst = be32_to_cpu(*(const __be32 *)src);
} else if (bits == 128) {
- ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]);
- ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]);
+ ((u64 *)dst)[0] = get_unaligned_be64(src);
+ ((u64 *)dst)[1] = get_unaligned_be64(src + 8);
}
}
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
index 1ea4f874e367..7eb76724b3ed 100644
--- a/drivers/net/wireguard/queueing.h
+++ b/drivers/net/wireguard/queueing.h
@@ -124,10 +124,10 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
*/
static inline int wg_cpumask_next_online(int *last_cpu)
{
- int cpu = cpumask_next(*last_cpu, cpu_online_mask);
+ int cpu = cpumask_next(READ_ONCE(*last_cpu), cpu_online_mask);
if (cpu >= nr_cpu_ids)
cpu = cpumask_first(cpu_online_mask);
- *last_cpu = cpu;
+ WRITE_ONCE(*last_cpu, cpu);
return cpu;
}
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
index 0d48e0f4a1ba..26e09c30d596 100644
--- a/drivers/net/wireguard/send.c
+++ b/drivers/net/wireguard/send.c
@@ -222,7 +222,7 @@ void wg_packet_send_keepalive(struct wg_peer *peer)
{
struct sk_buff *skb;
- if (skb_queue_empty(&peer->staged_packet_queue)) {
+ if (skb_queue_empty_lockless(&peer->staged_packet_queue)) {
skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH,
GFP_ATOMIC);
if (unlikely(!skb))
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 462375b293e4..c94203ce65bb 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -81,7 +81,8 @@ EXPORT_SYMBOL_GPL(of_irq_find_parent);
/*
* These interrupt controllers abuse interrupt-map for unspeakable
* reasons and rely on the core code to *ignore* it (the drivers do
- * their own parsing of the property).
+ * their own parsing of the property). The PAsemi entry covers a
+ * non-sensical interrupt-map that is better left ignored.
*
* If you think of adding to the list for something *new*, think
* again. There is a high chance that you will be sent back to the
@@ -95,6 +96,7 @@ static const char * const of_irq_imap_abusers[] = {
"fsl,ls1043a-extirq",
"fsl,ls1088a-extirq",
"renesas,rza1-irqc",
+ "pasemi,rootbus",
NULL,
};
@@ -293,20 +295,8 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
imaplen -= imap - oldimap;
pr_debug(" -> imaplen=%d\n", imaplen);
}
- if (!match) {
- if (intc) {
- /*
- * The PASEMI Nemo is a known offender, so
- * let's only warn for anyone else.
- */
- WARN(!IS_ENABLED(CONFIG_PPC_PASEMI),
- "%pOF interrupt-map failed, using interrupt-controller\n",
- ipar);
- return 0;
- }
-
+ if (!match)
goto fail;
- }
/*
* Successfully parsed an interrupt-map translation; copy new
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index 78c490e0505a..0a02e85a8951 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -167,7 +167,7 @@ u64 riscv_pmu_event_update(struct perf_event *event)
unsigned long cmask;
u64 oldval, delta;
- if (!rvpmu->ctr_read)
+ if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE))
return 0;
cmask = riscv_pmu_ctr_get_width_mask(event);
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index a2e4005e1fd0..4e842dcedfba 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -20,6 +20,7 @@
#include <linux/cpu_pm.h>
#include <linux/sched/clock.h>
#include <linux/soc/andes/irq.h>
+#include <linux/workqueue.h>
#include <asm/errata_list.h>
#include <asm/sbi.h>
@@ -114,7 +115,7 @@ struct sbi_pmu_event_data {
};
};
-static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
+static struct sbi_pmu_event_data pmu_hw_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = {
SBI_PMU_HW_CPU_CYCLES,
SBI_PMU_EVENT_TYPE_HW, 0}},
@@ -148,7 +149,7 @@ static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
};
#define C(x) PERF_COUNT_HW_CACHE_##x
-static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
+static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
@@ -293,6 +294,34 @@ static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_M
},
};
+static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ 0, cmask, 0, edata->event_idx, 0, 0);
+ if (!ret.error) {
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
+ ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
+ } else if (ret.error == SBI_ERR_NOT_SUPPORTED) {
+ /* This event cannot be monitored by any counter */
+ edata->event_idx = -EINVAL;
+ }
+}
+
+static void pmu_sbi_check_std_events(struct work_struct *work)
+{
+ for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++)
+ pmu_sbi_check_event(&pmu_hw_event_map[i]);
+
+ for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++)
+ for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++)
+ for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++)
+ pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]);
+}
+
+static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events);
+
static int pmu_sbi_ctr_get_width(int idx)
{
return pmu_ctr_list[idx].width;
@@ -478,6 +507,12 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
u64 raw_config_val;
int ret;
+ /*
+ * Ensure we are finished checking standard hardware events for
+ * validity before allowing userspace to configure any events.
+ */
+ flush_work(&check_std_events_work);
+
switch (type) {
case PERF_TYPE_HARDWARE:
if (config >= PERF_COUNT_HW_MAX)
@@ -762,7 +797,7 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
* which may include counters that are not enabled yet.
*/
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
- 0, pmu->cmask, 0, 0, 0, 0);
+ 0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
}
static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
@@ -1359,6 +1394,9 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
if (ret)
goto out_unregister;
+ /* Asynchronously check which standard events are available */
+ schedule_work(&check_std_events_work);
+
return 0;
out_unregister:
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 10d0ce6c8342..78a5aac2dcfd 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -3299,6 +3299,7 @@ static const struct dmi_system_id toshiba_dmi_quirks[] __initconst = {
},
.driver_data = (void *)(QUIRK_TURN_ON_PANEL_ON_RESUME | QUIRK_HCI_HOTKEY_QUICKSTART),
},
+ { }
};
static int toshiba_acpi_add(struct acpi_device *acpi_dev)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 6b64af7d4927..1b7561abe05d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -4119,8 +4119,6 @@ static int sd_resume(struct device *dev)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
- sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
-
if (opal_unlock_from_suspend(sdkp->opal_dev)) {
sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n");
return -EIO;
@@ -4137,12 +4135,13 @@ static int sd_resume_common(struct device *dev, bool runtime)
if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */
return 0;
+ sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+
if (!sd_do_start_stop(sdkp->device, runtime)) {
sdkp->suspended = false;
return 0;
}
- sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
ret = sd_start_stop_device(sdkp, 1);
if (!ret) {
sd_resume(dev);
diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 45f04a25255a..1b2345a697c5 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -759,6 +759,9 @@ static void power_allocator_manage(struct thermal_zone_device *tz)
return;
}
+ if (!params->trip_max)
+ return;
+
allocate_power(tz, params->trip_max->temperature);
params->update_cdevs = true;
}
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 1b0ab2790860..ecc748d15eb7 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -300,6 +300,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz)
thermal_zone_device_set_polling(tz, tz->passive_delay_jiffies);
else if (tz->polling_delay_jiffies)
thermal_zone_device_set_polling(tz, tz->polling_delay_jiffies);
+ else if (tz->temperature == THERMAL_TEMP_INVALID)
+ thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS));
}
static struct thermal_governor *thermal_get_tz_governor(struct thermal_zone_device *tz)
@@ -482,16 +484,14 @@ static void thermal_trip_crossed(struct thermal_zone_device *tz,
thermal_governor_trip_crossed(governor, tz, trip, crossed_up);
}
-static int thermal_trip_notify_cmp(void *ascending, const struct list_head *a,
+static int thermal_trip_notify_cmp(void *not_used, const struct list_head *a,
const struct list_head *b)
{
struct thermal_trip_desc *tda = container_of(a, struct thermal_trip_desc,
notify_list_node);
struct thermal_trip_desc *tdb = container_of(b, struct thermal_trip_desc,
notify_list_node);
- int ret = tdb->notify_temp - tda->notify_temp;
-
- return ascending ? ret : -ret;
+ return tda->notify_temp - tdb->notify_temp;
}
void __thermal_zone_device_update(struct thermal_zone_device *tz,
@@ -511,7 +511,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
update_temperature(tz);
if (tz->temperature == THERMAL_TEMP_INVALID)
- return;
+ goto monitor;
__thermal_zone_set_trips(tz);
@@ -520,12 +520,12 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
for_each_trip_desc(tz, td)
handle_thermal_trip(tz, td, &way_up_list, &way_down_list);
- list_sort(&way_up_list, &way_up_list, thermal_trip_notify_cmp);
+ list_sort(NULL, &way_up_list, thermal_trip_notify_cmp);
list_for_each_entry(td, &way_up_list, notify_list_node)
thermal_trip_crossed(tz, &td->trip, governor, true);
list_sort(NULL, &way_down_list, thermal_trip_notify_cmp);
- list_for_each_entry(td, &way_down_list, notify_list_node)
+ list_for_each_entry_reverse(td, &way_down_list, notify_list_node)
thermal_trip_crossed(tz, &td->trip, governor, false);
if (governor->manage)
@@ -533,6 +533,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
thermal_debug_update_trip_stats(tz);
+monitor:
monitor_thermal_zone(tz);
}
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 66f67e54e0c8..94eeb4011a48 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -133,6 +133,12 @@ struct thermal_zone_device {
struct thermal_trip_desc trips[] __counted_by(num_trips);
};
+/*
+ * Default delay after a failing thermal zone temperature check before
+ * attempting to check it again.
+ */
+#define THERMAL_RECHECK_DELAY_MS 250
+
/* Default Thermal Governor */
#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)
#define DEFAULT_THERMAL_GOVERNOR "step_wise"
diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 8944548c30fa..c532416aec22 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -105,16 +105,15 @@ EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac);
* @hba: per adapter instance
* @req: pointer to the request to be issued
*
- * Return: the hardware queue instance on which the request would
- * be queued.
+ * Return: the hardware queue instance on which the request will be or has
+ * been queued. %NULL if the request has already been freed.
*/
struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba,
struct request *req)
{
- u32 utag = blk_mq_unique_tag(req);
- u32 hwq = blk_mq_unique_tag_to_hwq(utag);
+ struct blk_mq_hw_ctx *hctx = READ_ONCE(req->mq_hctx);
- return &hba->uhq[hwq];
+ return hctx ? &hba->uhq[hctx->queue_num] : NULL;
}
/**
@@ -515,6 +514,8 @@ int ufshcd_mcq_sq_cleanup(struct ufs_hba *hba, int task_tag)
if (!cmd)
return -EINVAL;
hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
+ if (!hwq)
+ return 0;
} else {
hwq = hba->dev_cmd_queue;
}
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 1b65e6ae4137..46433ecf0c4d 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -6456,6 +6456,8 @@ static bool ufshcd_abort_one(struct request *rq, void *priv)
/* Release cmd in MCQ mode if abort succeeds */
if (is_mcq_enabled(hba) && (*ret == 0)) {
hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
+ if (!hwq)
+ return 0;
spin_lock_irqsave(&hwq->cq_lock, flags);
if (ufshcd_cmd_inflight(lrbp->cmd))
ufshcd_release_scsi_cmd(hba, lrbp);
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 987c7921affa..ba0ce0075b2f 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1260,7 +1260,7 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
struct vfio_pci_hot_reset_info hdr;
struct vfio_pci_fill_info fill = {};
bool slot = false;
- int ret, count;
+ int ret, count = 0;
if (copy_from_user(&hdr, arg, minsz))
return -EFAULT;
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 1de9fac3bcf4..658f11aebda1 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -3,6 +3,7 @@
#include "alloc_background.h"
#include "alloc_foreground.h"
#include "backpointers.h"
+#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_io.h"
#include "btree_key_cache.h"
@@ -1553,13 +1554,13 @@ err:
}
static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
- struct btree_iter *alloc_iter)
+ struct btree_iter *alloc_iter,
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
- struct btree_iter lru_iter;
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a;
- struct bkey_s_c alloc_k, lru_k;
+ struct bkey_s_c alloc_k;
struct printbuf buf = PRINTBUF;
int ret;
@@ -1573,6 +1574,14 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
a = bch2_alloc_to_v4(alloc_k, &a_convert);
+ if (a->fragmentation_lru) {
+ ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START,
+ a->fragmentation_lru,
+ alloc_k, last_flushed);
+ if (ret)
+ return ret;
+ }
+
if (a->data_type != BCH_DATA_cached)
return 0;
@@ -1597,41 +1606,30 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
a = &a_mut->v;
}
- lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
- lru_pos(alloc_k.k->p.inode,
- bucket_to_u64(alloc_k.k->p),
- a->io_time[READ]), 0);
- ret = bkey_err(lru_k);
+ ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ],
+ alloc_k, last_flushed);
if (ret)
- return ret;
-
- if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
- alloc_key_to_missing_lru_entry,
- "missing lru entry\n"
- " %s",
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
- ret = bch2_lru_set(trans,
- alloc_k.k->p.inode,
- bucket_to_u64(alloc_k.k->p),
- a->io_time[READ]);
- if (ret)
- goto err;
- }
+ goto err;
err:
fsck_err:
- bch2_trans_iter_exit(trans, &lru_iter);
printbuf_exit(&buf);
return ret;
}
int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
{
+ struct bkey_buf last_flushed;
+
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_alloc_to_lru_ref(trans, &iter)));
+ bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
+
+ bch2_bkey_buf_exit(&last_flushed, c);
bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 9d3d64746a5b..27d97c22ae27 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1703,6 +1703,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
+ printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 24);
percpu_down_read(&c->mark_lock);
@@ -1736,6 +1737,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
+ printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 12);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 4321f9fb73bd..6d8b1bc90be0 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -434,13 +434,6 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
return ret;
}
-static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
-{
- return bpos_eq(l.k->p, r.k->p) &&
- bkey_bytes(l.k) == bkey_bytes(r.k) &&
- !memcmp(l.v, r.v, bkey_val_bytes(l.k));
-}
-
struct extents_to_bp_state {
struct bpos bucket_start;
struct bpos bucket_end;
@@ -536,11 +529,8 @@ static int check_bp_exists(struct btree_trans *trans,
struct btree_iter other_extent_iter = {};
struct printbuf buf = PRINTBUF;
struct bkey_s_c bp_k;
- struct bkey_buf tmp;
int ret = 0;
- bch2_bkey_buf_init(&tmp);
-
struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket);
if (!ca) {
prt_str(&buf, "extent for nonexistent device:bucket ");
@@ -565,22 +555,9 @@ static int check_bp_exists(struct btree_trans *trans,
if (bp_k.k->type != KEY_TYPE_backpointer ||
memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
- bch2_bkey_buf_reassemble(&tmp, c, orig_k);
-
- if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
- if (bp.level) {
- bch2_trans_unlock(trans);
- bch2_btree_interior_updates_flush(c);
- }
-
- ret = bch2_btree_write_buffer_flush_sync(trans);
- if (ret)
- goto err;
-
- bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
- ret = -BCH_ERR_transaction_restart_write_buffer_flush;
- goto out;
- }
+ ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed);
+ if (ret)
+ goto err;
goto check_existing_bp;
}
@@ -589,7 +566,6 @@ err:
fsck_err:
bch2_trans_iter_exit(trans, &other_extent_iter);
bch2_trans_iter_exit(trans, &bp_iter);
- bch2_bkey_buf_exit(&tmp, c);
bch2_dev_put(ca);
printbuf_exit(&buf);
return ret;
@@ -794,6 +770,8 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
!((1U << btree) & btree_interior_mask))
continue;
+ bch2_trans_begin(trans);
+
__for_each_btree_node(trans, iter, btree,
btree == start.btree ? start.pos : POS_MIN,
0, depth, BTREE_ITER_prefetch, b, ret) {
@@ -905,7 +883,7 @@ static int check_one_backpointer(struct btree_trans *trans,
struct bbpos start,
struct bbpos end,
struct bkey_s_c_backpointer bp,
- struct bpos *last_flushed_pos)
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@@ -925,20 +903,18 @@ static int check_one_backpointer(struct btree_trans *trans,
if (ret)
return ret;
- if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
- *last_flushed_pos = bp.k->p;
- ret = bch2_btree_write_buffer_flush_sync(trans) ?:
- -BCH_ERR_transaction_restart_write_buffer_flush;
- goto out;
- }
+ if (!k.k) {
+ ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed);
+ if (ret)
+ goto out;
- if (fsck_err_on(!k.k, c,
- backpointer_to_missing_ptr,
- "backpointer for missing %s\n %s",
- bp.v->level ? "btree node" : "extent",
- (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
- ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
- goto out;
+ if (fsck_err(c, backpointer_to_missing_ptr,
+ "backpointer for missing %s\n %s",
+ bp.v->level ? "btree node" : "extent",
+ (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
+ ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
+ goto out;
+ }
}
out:
fsck_err:
@@ -951,14 +927,20 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
struct bbpos start,
struct bbpos end)
{
- struct bpos last_flushed_pos = SPOS_MAX;
+ struct bkey_buf last_flushed;
- return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_one_backpointer(trans, start, end,
bkey_s_c_to_backpointer(k),
- &last_flushed_pos));
+ &last_flushed));
+
+ bch2_bkey_buf_exit(&last_flushed, trans->c);
+ return ret;
}
int bch2_check_backpointers_to_extents(struct bch_fs *c)
diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
index 94a1d1982fa8..587d7318a2e8 100644
--- a/fs/bcachefs/bkey.c
+++ b/fs/bcachefs/bkey.c
@@ -660,8 +660,9 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
bch2_bkey_format_field_overflows(f, i)) {
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
- u64 packed_max = f->bits_per_field[i]
- ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1))
+ unsigned packed_bits = min(64, f->bits_per_field[i]);
+ u64 packed_max = packed_bits
+ ? ~((~0ULL << 1) << (packed_bits - 1))
: 0;
prt_printf(err, "field %u too large: %llu + %llu > %llu",
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index fcd43915df07..936357149cf0 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -194,6 +194,13 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
return bkey_gt(l, r) ? l : r;
}
+static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
+{
+ return bpos_eq(l.k->p, r.k->p) &&
+ bkey_bytes(l.k) == bkey_bytes(r.k) &&
+ !memcmp(l.v, r.v, bkey_val_bytes(l.k));
+}
+
void bch2_bpos_swab(struct bpos *);
void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 0e477a926579..7c72a9e6f511 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -903,6 +903,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
bch2_dev_usage_update(c, ca, &old_gc, &gc, 0, true);
percpu_up_read(&c->mark_lock);
+ gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca);
+
if (fsck_err_on(new.data_type != gc.data_type, c,
alloc_key_data_type_wrong,
"bucket %llu:%llu gen %u has wrong data_type"
@@ -916,23 +918,19 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
#define copy_bucket_field(_errtype, _f) \
if (fsck_err_on(new._f != gc._f, c, _errtype, \
"bucket %llu:%llu gen %u data type %s has wrong " #_f \
- ": got %u, should be %u", \
+ ": got %llu, should be %llu", \
iter->pos.inode, iter->pos.offset, \
gc.gen, \
bch2_data_type_str(gc.data_type), \
- new._f, gc._f)) \
+ (u64) new._f, (u64) gc._f)) \
new._f = gc._f; \
- copy_bucket_field(alloc_key_gen_wrong,
- gen);
- copy_bucket_field(alloc_key_dirty_sectors_wrong,
- dirty_sectors);
- copy_bucket_field(alloc_key_cached_sectors_wrong,
- cached_sectors);
- copy_bucket_field(alloc_key_stripe_wrong,
- stripe);
- copy_bucket_field(alloc_key_stripe_redundancy_wrong,
- stripe_redundancy);
+ copy_bucket_field(alloc_key_gen_wrong, gen);
+ copy_bucket_field(alloc_key_dirty_sectors_wrong, dirty_sectors);
+ copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors);
+ copy_bucket_field(alloc_key_stripe_wrong, stripe);
+ copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy);
+ copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru);
#undef copy_bucket_field
if (!bch2_alloc_v4_cmp(*old, new))
@@ -946,7 +944,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
a->v = new;
/*
- * The trigger normally makes sure this is set, but we're not running
+ * The trigger normally makes sure these are set, but we're not running
* triggers:
*/
if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 75c8a196b3f6..d0e92d948002 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bkey_buf.h"
#include "btree_locking.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "error.h"
+#include "extents.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
@@ -492,6 +494,41 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
return ret;
}
+/**
+ * In check and repair code, when checking references to write buffer btrees we
+ * need to issue a flush before we have a definitive error: this issues a flush
+ * if this is a key we haven't yet checked.
+ */
+int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
+ struct bkey_s_c referring_k,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_buf tmp;
+ int ret = 0;
+
+ bch2_bkey_buf_init(&tmp);
+
+ if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) {
+ bch2_bkey_buf_reassemble(&tmp, c, referring_k);
+
+ if (bkey_is_btree_ptr(referring_k.k)) {
+ bch2_trans_unlock(trans);
+ bch2_btree_interior_updates_flush(c);
+ }
+
+ ret = bch2_btree_write_buffer_flush_sync(trans);
+ if (ret)
+ goto err;
+
+ bch2_bkey_buf_copy(last_flushed, c, tmp.k);
+ ret = -BCH_ERR_transaction_restart_write_buffer_flush;
+ }
+err:
+ bch2_bkey_buf_exit(&tmp, c);
+ return ret;
+}
+
static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work);
diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h
index eebcd2b15249..dd5e64218b50 100644
--- a/fs/bcachefs/btree_write_buffer.h
+++ b/fs/bcachefs/btree_write_buffer.h
@@ -23,6 +23,9 @@ int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
int bch2_btree_write_buffer_tryflush(struct btree_trans *);
+struct bkey_buf;
+int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct bkey_buf *);
+
struct journal_keys_to_wb {
struct btree_write_buffer_keys *wb;
size_t room;
diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
index 363644451106..0f40b585ce2b 100644
--- a/fs/bcachefs/clock.c
+++ b/fs/bcachefs/clock.c
@@ -132,14 +132,9 @@ static struct io_timer *get_expired_timer(struct io_clock *clock,
{
struct io_timer *ret = NULL;
- spin_lock(&clock->timer_lock);
-
if (clock->timers.used &&
time_after_eq(now, clock->timers.data[0]->expire))
heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
-
- spin_unlock(&clock->timer_lock);
-
return ret;
}
@@ -148,8 +143,10 @@ void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
struct io_timer *timer;
unsigned long now = atomic64_add_return(sectors, &clock->now);
+ spin_lock(&clock->timer_lock);
while ((timer = get_expired_timer(clock, now)))
timer->fn(timer);
+ spin_unlock(&clock->timer_lock);
}
void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 1a0072eef109..0087b8555ead 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -5,7 +5,9 @@
#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
+#include "compress.h"
#include "data_update.h"
+#include "disk_groups.h"
#include "ec.h"
#include "error.h"
#include "extents.h"
@@ -454,6 +456,38 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans,
}
}
+void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ printbuf_tabstop_push(out, 20);
+ prt_str(out, "rewrite ptrs:\t");
+ bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
+ prt_newline(out);
+
+ prt_str(out, "kill ptrs:\t");
+ bch2_prt_u64_base2(out, data_opts->kill_ptrs);
+ prt_newline(out);
+
+ prt_str(out, "target:\t");
+ bch2_target_to_text(out, c, data_opts->target);
+ prt_newline(out);
+
+ prt_str(out, "compression:\t");
+ bch2_compression_opt_to_text(out, background_compression(*io_opts));
+ prt_newline(out);
+
+ prt_str(out, "extra replicas:\t");
+ prt_u64(out, data_opts->extra_replicas);
+}
+
+void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
+{
+ bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
+ prt_newline(out);
+ bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
+}
+
int bch2_extent_drop_ptrs(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
@@ -643,6 +677,16 @@ int bch2_data_update_init(struct btree_trans *trans,
if (!(durability_have + durability_removing))
m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
+ if (!m->op.nr_replicas) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_data_update_to_text(&buf, m);
+ WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf);
+ printbuf_exit(&buf);
+ ret = -BCH_ERR_data_update_done;
+ goto done;
+ }
+
m->op.nr_replicas_required = m->op.nr_replicas;
if (reserve_sectors) {
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 991095bbd469..8d36365bdea8 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -17,6 +17,9 @@ struct data_update_opts {
unsigned write_flags;
};
+void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
+ struct bch_io_opts *, struct data_update_opts *);
+
struct data_update {
/* extent being updated: */
enum btree_id btree_id;
@@ -27,6 +30,8 @@ struct data_update {
struct bch_write_op op;
};
+void bch2_data_update_to_text(struct printbuf *, struct data_update *);
+
int bch2_data_update_index_update(struct bch_write_op *);
void bch2_data_update_read_done(struct data_update *,
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index f0d4727c4dc2..ebabab171fe5 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -610,7 +610,7 @@ restart:
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
list_for_each_entry(trans, &c->btree_trans_list, list) {
- if ((ulong) trans < i->iter)
+ if ((ulong) trans <= i->iter)
continue;
i->iter = (ulong) trans;
@@ -832,16 +832,16 @@ static const struct file_operations btree_transaction_stats_op = {
static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
{
struct btree_trans *trans;
- pid_t iter = 0;
+ ulong iter = 0;
restart:
seqmutex_lock(&c->btree_trans_lock);
- list_for_each_entry(trans, &c->btree_trans_list, list) {
- struct task_struct *task = READ_ONCE(trans->locking_wait.task);
+ list_sort(&c->btree_trans_list, list_ptr_order_cmp);
- if (!task || task->pid <= iter)
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ if ((ulong) trans <= iter)
continue;
- iter = task->pid;
+ iter = (ulong) trans;
if (!closure_get_not_zero(&trans->ref))
continue;
diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
index 24840aee335c..795f4fc0bab1 100644
--- a/fs/bcachefs/eytzinger.h
+++ b/fs/bcachefs/eytzinger.h
@@ -48,7 +48,7 @@ static inline unsigned eytzinger1_right_child(unsigned i)
static inline unsigned eytzinger1_first(unsigned size)
{
- return rounddown_pow_of_two(size);
+ return size ? rounddown_pow_of_two(size) : 0;
}
static inline unsigned eytzinger1_last(unsigned size)
@@ -101,7 +101,9 @@ static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
static inline unsigned eytzinger1_extra(unsigned size)
{
- return (size + 1 - rounddown_pow_of_two(size)) << 1;
+ return size
+ ? (size + 1 - rounddown_pow_of_two(size)) << 1
+ : 0;
}
static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index f9c9a95d7d4c..74a1e12a7a14 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -194,6 +194,12 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
* discard_new_inode() expects it to be set...
*/
inode->v.i_flags |= I_NEW;
+ /*
+ * We don't want bch2_evict_inode() to delete the inode on disk,
+ * we just raced and had another inode in cache. Normally new
+ * inodes don't have nlink == 0 - except tmpfiles do...
+ */
+ set_nlink(&inode->v, 1);
discard_new_inode(&inode->v);
inode = old;
} else {
@@ -2026,6 +2032,8 @@ err_put_super:
__bch2_fs_stop(c);
deactivate_locked_super(sb);
err:
+ if (ret)
+ pr_err("error: %s", bch2_err_str(ret));
/*
* On an inconsistency error in recovery we might see an -EROFS derived
* errorcode (from the journal), but we don't want to return that to
@@ -2065,7 +2073,8 @@ int __init bch2_vfs_init(void)
{
int ret = -ENOMEM;
- bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT);
+ bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT |
+ SLAB_ACCOUNT);
if (!bch2_inode_cache)
goto err;
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index c97fa7002b06..ebf39ef72fb2 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -389,7 +389,6 @@ retry:
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- bch2_trans_unlock(trans);
if (!bch2_bkey_matches_ptr(c, k,
rbio->pick.ptr,
@@ -1004,6 +1003,9 @@ get_bio:
rbio->promote = promote;
INIT_WORK(&rbio->work, NULL);
+ if (flags & BCH_READ_NODECODE)
+ orig->pick = pick;
+
rbio->bio.bi_opf = orig->bio.bi_opf;
rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
rbio->bio.bi_end_io = bch2_read_endio;
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 13669dd0e375..10b19791ec98 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1095,7 +1095,7 @@ unlock:
return ret;
}
-int bch2_dev_journal_alloc(struct bch_dev *ca)
+int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
{
unsigned nr;
int ret;
@@ -1117,7 +1117,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca)
min(1 << 13,
(1 << 24) / ca->mi.bucket_size));
- ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
+ ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL);
err:
bch_err_fn(ca, ret);
return ret;
@@ -1129,7 +1129,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c)
if (ca->journal.nr)
continue;
- int ret = bch2_dev_journal_alloc(ca);
+ int ret = bch2_dev_journal_alloc(ca, true);
if (ret) {
percpu_ref_put(&ca->io_ref);
return ret;
@@ -1184,9 +1184,11 @@ void bch2_fs_journal_stop(struct journal *j)
journal_quiesce(j);
cancel_delayed_work_sync(&j->write_work);
- BUG_ON(!bch2_journal_error(j) &&
- test_bit(JOURNAL_replay_done, &j->flags) &&
- j->last_empty_seq != journal_cur_seq(j));
+ WARN(!bch2_journal_error(j) &&
+ test_bit(JOURNAL_replay_done, &j->flags) &&
+ j->last_empty_seq != journal_cur_seq(j),
+ "journal shutdown error: cur seq %llu but last empty seq %llu",
+ journal_cur_seq(j), j->last_empty_seq);
if (!bch2_journal_error(j))
clear_bit(JOURNAL_running, &j->flags);
@@ -1418,8 +1420,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
unsigned long now = jiffies;
u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes;
- if (!out->nr_tabstops)
- printbuf_tabstop_push(out, 28);
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 28);
out->atomic++;
rcu_read_lock();
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index fd1f7cdaa8bc..bc6b9c39dcb4 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -433,7 +433,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
unsigned nr);
-int bch2_dev_journal_alloc(struct bch_dev *);
+int bch2_dev_journal_alloc(struct bch_dev *, bool);
int bch2_fs_journal_alloc(struct bch_fs *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index db24ce21b2ac..2326e2cb9cd2 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -415,6 +415,8 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c,
flags|BCH_VALIDATE_journal);
if (ret == FSCK_DELETED_KEY)
continue;
+ else if (ret)
+ return ret;
k = bkey_next(k);
}
@@ -1762,11 +1764,13 @@ static CLOSURE_CALLBACK(journal_write_preflush)
if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
spin_lock(&j->lock);
- closure_wait(&j->async_wait, cl);
+ if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
+ closure_wait(&j->async_wait, cl);
+ spin_unlock(&j->lock);
+ continue_at(cl, journal_write_preflush, j->wq);
+ return;
+ }
spin_unlock(&j->lock);
-
- continue_at(cl, journal_write_preflush, j->wq);
- return;
}
if (w->separate_flush) {
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index a40d116224ed..b12894ef44f3 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -77,6 +77,45 @@ static const char * const bch2_lru_types[] = {
NULL
};
+int bch2_lru_check_set(struct btree_trans *trans,
+ u16 lru_id, u64 time,
+ struct bkey_s_c referring_k,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+ struct btree_iter lru_iter;
+ struct bkey_s_c lru_k =
+ bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
+ lru_pos(lru_id,
+ bucket_to_u64(referring_k.k->p),
+ time), 0);
+ int ret = bkey_err(lru_k);
+ if (ret)
+ return ret;
+
+ if (lru_k.k->type != KEY_TYPE_set) {
+ ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed);
+ if (ret)
+ goto err;
+
+ if (fsck_err(c, alloc_key_to_missing_lru_entry,
+ "missing %s lru entry\n"
+ " %s",
+ bch2_lru_types[lru_type(lru_k)],
+ (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) {
+ ret = bch2_lru_set(trans, lru_id, bucket_to_u64(referring_k.k->p), time);
+ if (ret)
+ goto err;
+ }
+ }
+err:
+fsck_err:
+ bch2_trans_iter_exit(trans, &lru_iter);
+ printbuf_exit(&buf);
+ return ret;
+}
+
static int bch2_check_lru_key(struct btree_trans *trans,
struct btree_iter *lru_iter,
struct bkey_s_c lru_k,
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index bd71ba77de07..ed75bcf59d47 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -61,6 +61,9 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64);
int bch2_lru_set(struct btree_trans *, u16, u64, u64);
int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
+struct bkey_buf;
+int bch2_lru_check_set(struct btree_trans *, u16, u64, struct bkey_s_c, struct bkey_buf *);
+
int bch2_check_lrus(struct bch_fs *);
#endif /* _BCACHEFS_LRU_H */
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 6e477fadaa2a..e714e3bd5bbb 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -36,31 +36,6 @@ const char * const bch2_data_ops_strs[] = {
NULL
};
-static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
- struct bch_io_opts *io_opts,
- struct data_update_opts *data_opts)
-{
- printbuf_tabstop_push(out, 20);
- prt_str(out, "rewrite ptrs:\t");
- bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
- prt_newline(out);
-
- prt_str(out, "kill ptrs:\t");
- bch2_prt_u64_base2(out, data_opts->kill_ptrs);
- prt_newline(out);
-
- prt_str(out, "target:\t");
- bch2_target_to_text(out, c, data_opts->target);
- prt_newline(out);
-
- prt_str(out, "compression:\t");
- bch2_compression_opt_to_text(out, background_compression(*io_opts));
- prt_newline(out);
-
- prt_str(out, "extra replicas:\t");
- prt_u64(out, data_opts->extra_replicas);
-}
-
static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index d6f35a99c429..d54121ec093f 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -286,7 +286,8 @@ enum bch_fsck_flags {
x(accounting_mismatch, 272, 0) \
x(accounting_replicas_not_marked, 273, 0) \
x(invalid_btree_id, 274, 0) \
- x(alloc_key_io_time_bad, 275, 0)
+ x(alloc_key_io_time_bad, 275, 0) \
+ x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index fb906467201e..da735608d47c 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -563,8 +563,11 @@ static void __bch2_fs_free(struct bch_fs *c)
BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
- EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved));
- free_percpu(c->online_reserved);
+ if (c->online_reserved) {
+ u64 v = percpu_u64_get(c->online_reserved);
+ WARN(v, "online_reserved not 0 at shutdown: %lli", v);
+ free_percpu(c->online_reserved);
+ }
darray_exit(&c->btree_roots_extra);
free_percpu(c->pcpu);
@@ -1769,7 +1772,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
if (ret)
goto err;
- ret = bch2_dev_journal_alloc(ca);
+ ret = bch2_dev_journal_alloc(ca, true);
bch_err_msg(c, ret, "allocating journal");
if (ret)
goto err;
@@ -1929,7 +1932,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
}
if (!ca->journal.nr) {
- ret = bch2_dev_journal_alloc(ca);
+ ret = bch2_dev_journal_alloc(ca, false);
bch_err_msg(ca, ret, "allocating journal");
if (ret)
goto err;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f688fab55251..958155cc43a8 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3553,7 +3553,7 @@ err:
for (int i = 0; i < num_folios; i++) {
if (eb->folios[i]) {
detach_extent_buffer_folio(eb, eb->folios[i]);
- __folio_put(eb->folios[i]);
+ folio_put(eb->folios[i]);
}
}
__free_extent_buffer(eb);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 753db965f7c0..3a2b902b2d1f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10385,7 +10385,7 @@ out_unlock:
out_folios:
for (i = 0; i < nr_folios; i++) {
if (folios[i])
- __folio_put(folios[i]);
+ folio_put(folios[i]);
}
kvfree(folios);
out:
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index bf0f81d59b6b..39a15cca58ca 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3062,8 +3062,6 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_inherit *inherit,
size_t size)
{
- if (!btrfs_qgroup_enabled(fs_info))
- return 0;
if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP)
return -EOPNOTSUPP;
if (size < sizeof(*inherit) || size > PAGE_SIZE)
@@ -3085,6 +3083,14 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
return -EINVAL;
/*
+ * Skip the inherit source qgroups check if qgroup is not enabled.
+ * Qgroup can still be later enabled causing problems, but in that case
+ * btrfs_qgroup_inherit() would just ignore those invalid ones.
+ */
+ if (!btrfs_qgroup_enabled(fs_info))
+ return 0;
+
+ /*
* Now check all the remaining qgroups, they should all:
*
* - Exist
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index cf531255ab76..9522a8b79d22 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -441,7 +441,8 @@ static int process_extent_item(struct btrfs_fs_info *fs_info,
u32 item_size = btrfs_item_size(leaf, slot);
unsigned long end, ptr;
u64 offset, flags, count;
- int type, ret;
+ int type;
+ int ret = 0;
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
flags = btrfs_extent_flags(leaf, ei);
@@ -486,7 +487,11 @@ static int process_extent_item(struct btrfs_fs_info *fs_info,
key->objectid, key->offset);
break;
case BTRFS_EXTENT_OWNER_REF_KEY:
- WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
+ if (!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) {
+ btrfs_err(fs_info,
+ "found extent owner ref without simple quotas enabled");
+ ret = -EINVAL;
+ }
break;
default:
btrfs_err(fs_info, "invalid key type in iref");
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index d620323d08ea..ae8c56442549 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -373,11 +373,18 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
* "optimal" chunk size based on the fs size. However when we actually
* allocate the chunk we will strip this down further, making it no more
* than 10% of the disk or 1G, whichever is smaller.
+ *
+ * On the zoned mode, we need to use zone_size (=
+ * data_sinfo->chunk_size) as it is.
*/
data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
- data_chunk_size = min(data_sinfo->chunk_size,
- mult_perc(fs_info->fs_devices->total_rw_bytes, 10));
- data_chunk_size = min_t(u64, data_chunk_size, SZ_1G);
+ if (!btrfs_is_zoned(fs_info)) {
+ data_chunk_size = min(data_sinfo->chunk_size,
+ mult_perc(fs_info->fs_devices->total_rw_bytes, 10));
+ data_chunk_size = min_t(u64, data_chunk_size, SZ_1G);
+ } else {
+ data_chunk_size = data_sinfo->chunk_size;
+ }
/*
* Since data allocations immediately use block groups as part of the
@@ -405,6 +412,17 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
avail >>= 3;
else
avail >>= 1;
+
+ /*
+ * On the zoned mode, we always allocate one zone as one chunk.
+ * Returning non-zone size alingned bytes here will result in
+ * less pressure for the async metadata reclaim process, and it
+ * will over-commit too much leading to ENOSPC. Align down to the
+ * zone size to avoid that.
+ */
+ if (btrfs_is_zoned(fs_info))
+ avail = ALIGN_DOWN(avail, fs_info->zone_size);
+
return avail;
}
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
index f449f7340aad..9fb06dc16520 100644
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/statfs.h>
#include <linux/namei.h>
+#include <trace/events/fscache.h>
#include "internal.h"
/*
@@ -312,19 +313,59 @@ static void cachefiles_withdraw_objects(struct cachefiles_cache *cache)
}
/*
- * Withdraw volumes.
+ * Withdraw fscache volumes.
+ */
+static void cachefiles_withdraw_fscache_volumes(struct cachefiles_cache *cache)
+{
+ struct list_head *cur;
+ struct cachefiles_volume *volume;
+ struct fscache_volume *vcookie;
+
+ _enter("");
+retry:
+ spin_lock(&cache->object_list_lock);
+ list_for_each(cur, &cache->volumes) {
+ volume = list_entry(cur, struct cachefiles_volume, cache_link);
+
+ if (atomic_read(&volume->vcookie->n_accesses) == 0)
+ continue;
+
+ vcookie = fscache_try_get_volume(volume->vcookie,
+ fscache_volume_get_withdraw);
+ if (vcookie) {
+ spin_unlock(&cache->object_list_lock);
+ fscache_withdraw_volume(vcookie);
+ fscache_put_volume(vcookie, fscache_volume_put_withdraw);
+ goto retry;
+ }
+ }
+ spin_unlock(&cache->object_list_lock);
+
+ _leave("");
+}
+
+/*
+ * Withdraw cachefiles volumes.
*/
static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
{
_enter("");
for (;;) {
+ struct fscache_volume *vcookie = NULL;
struct cachefiles_volume *volume = NULL;
spin_lock(&cache->object_list_lock);
if (!list_empty(&cache->volumes)) {
volume = list_first_entry(&cache->volumes,
struct cachefiles_volume, cache_link);
+ vcookie = fscache_try_get_volume(volume->vcookie,
+ fscache_volume_get_withdraw);
+ if (!vcookie) {
+ spin_unlock(&cache->object_list_lock);
+ cpu_relax();
+ continue;
+ }
list_del_init(&volume->cache_link);
}
spin_unlock(&cache->object_list_lock);
@@ -332,6 +373,7 @@ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
break;
cachefiles_withdraw_volume(volume);
+ fscache_put_volume(vcookie, fscache_volume_put_withdraw);
}
_leave("");
@@ -371,6 +413,7 @@ void cachefiles_withdraw_cache(struct cachefiles_cache *cache)
pr_info("File cache on %s unregistering\n", fscache->name);
fscache_withdraw_cache(fscache);
+ cachefiles_withdraw_fscache_volumes(cache);
/* we now have to destroy all the active objects pertaining to this
* cache - which we do by passing them off to thread pool to be
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 06cdf1a8a16f..89b11336a836 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -366,14 +366,14 @@ static __poll_t cachefiles_daemon_poll(struct file *file,
if (cachefiles_in_ondemand_mode(cache)) {
if (!xa_empty(&cache->reqs)) {
- rcu_read_lock();
+ xas_lock(&xas);
xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) {
if (!cachefiles_ondemand_is_reopening_read(req)) {
mask |= EPOLLIN;
break;
}
}
- rcu_read_unlock();
+ xas_unlock(&xas);
}
} else {
if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags))
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 6845a90cdfcc..7b99bd98de75 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -48,6 +48,7 @@ enum cachefiles_object_state {
CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */
CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */
CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */
+ CACHEFILES_ONDEMAND_OBJSTATE_DROPPING, /* Object is being dropped. */
};
struct cachefiles_ondemand_info {
@@ -128,6 +129,7 @@ struct cachefiles_cache {
unsigned long req_id_next;
struct xarray ondemand_ids; /* xarray for ondemand_id allocation */
u32 ondemand_id_next;
+ u32 msg_id_next;
};
static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache)
@@ -335,6 +337,7 @@ cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \
CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN);
CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE);
CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING);
+CACHEFILES_OBJECT_STATE_FUNCS(dropping, DROPPING);
static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req)
{
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index bce005f2b456..470c96658385 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -517,7 +517,8 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
*/
xas_lock(&xas);
- if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
+ if (test_bit(CACHEFILES_DEAD, &cache->flags) ||
+ cachefiles_ondemand_object_is_dropping(object)) {
xas_unlock(&xas);
ret = -EIO;
goto out;
@@ -527,20 +528,32 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
smp_mb();
if (opcode == CACHEFILES_OP_CLOSE &&
- !cachefiles_ondemand_object_is_open(object)) {
+ !cachefiles_ondemand_object_is_open(object)) {
WARN_ON_ONCE(object->ondemand->ondemand_id == 0);
xas_unlock(&xas);
ret = -EIO;
goto out;
}
- xas.xa_index = 0;
+ /*
+ * Cyclically find a free xas to avoid msg_id reuse that would
+ * cause the daemon to successfully copen a stale msg_id.
+ */
+ xas.xa_index = cache->msg_id_next;
xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK);
+ if (xas.xa_node == XAS_RESTART) {
+ xas.xa_index = 0;
+ xas_find_marked(&xas, cache->msg_id_next - 1, XA_FREE_MARK);
+ }
if (xas.xa_node == XAS_RESTART)
xas_set_err(&xas, -EBUSY);
+
xas_store(&xas, req);
- xas_clear_mark(&xas, XA_FREE_MARK);
- xas_set_mark(&xas, CACHEFILES_REQ_NEW);
+ if (xas_valid(&xas)) {
+ cache->msg_id_next = xas.xa_index + 1;
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ xas_set_mark(&xas, CACHEFILES_REQ_NEW);
+ }
xas_unlock(&xas);
} while (xas_nomem(&xas, GFP_KERNEL));
@@ -568,7 +581,8 @@ out:
* If error occurs after creating the anonymous fd,
* cachefiles_ondemand_fd_release() will set object to close.
*/
- if (opcode == CACHEFILES_OP_OPEN)
+ if (opcode == CACHEFILES_OP_OPEN &&
+ !cachefiles_ondemand_object_is_dropping(object))
cachefiles_ondemand_set_object_close(object);
kfree(req);
return ret;
@@ -667,8 +681,34 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object)
void cachefiles_ondemand_clean_object(struct cachefiles_object *object)
{
+ unsigned long index;
+ struct cachefiles_req *req;
+ struct cachefiles_cache *cache;
+
+ if (!object->ondemand)
+ return;
+
cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0,
cachefiles_ondemand_init_close_req, NULL);
+
+ if (!object->ondemand->ondemand_id)
+ return;
+
+ /* Cancel all requests for the object that is being dropped. */
+ cache = object->volume->cache;
+ xa_lock(&cache->reqs);
+ cachefiles_ondemand_set_object_dropping(object);
+ xa_for_each(&cache->reqs, index, req) {
+ if (req->object == object) {
+ req->error = -EIO;
+ complete(&req->done);
+ __xa_erase(&cache->reqs, index);
+ }
+ }
+ xa_unlock(&cache->reqs);
+
+ /* Wait for ondemand_object_worker() to finish to avoid UAF. */
+ cancel_work_sync(&object->ondemand->ondemand_work);
}
int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object,
diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c
index 89df0ba8ba5e..781aac4ef274 100644
--- a/fs/cachefiles/volume.c
+++ b/fs/cachefiles/volume.c
@@ -133,7 +133,6 @@ void cachefiles_free_volume(struct fscache_volume *vcookie)
void cachefiles_withdraw_volume(struct cachefiles_volume *volume)
{
- fscache_withdraw_volume(volume->vcookie);
cachefiles_set_volume_xattr(volume);
__cachefiles_free_volume(volume);
}
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index bcb6173943ee..4dd8a993c60a 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -110,9 +110,11 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file
if (xlen == 0)
xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, tlen);
if (xlen != tlen) {
- if (xlen < 0)
+ if (xlen < 0) {
+ ret = xlen;
trace_cachefiles_vfs_error(object, file_inode(file), xlen,
cachefiles_trace_getxattr_error);
+ }
if (xlen == -EIO)
cachefiles_io_error_obj(
object,
@@ -252,6 +254,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len);
if (xlen != len) {
if (xlen < 0) {
+ ret = xlen;
trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen,
cachefiles_trace_getxattr_error);
if (xlen == -EIO)
diff --git a/fs/dcache.c b/fs/dcache.c
index d58dc9e58f3b..4c144519aa70 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -355,7 +355,11 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
flags &= ~DCACHE_ENTRY_TYPE;
WRITE_ONCE(dentry->d_flags, flags);
dentry->d_inode = NULL;
- if (flags & DCACHE_LRU_LIST)
+ /*
+ * The negative counter only tracks dentries on the LRU. Don't inc if
+ * d_lru is on another list.
+ */
+ if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
this_cpu_inc(nr_dentry_negative);
}
@@ -1844,9 +1848,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
spin_lock(&dentry->d_lock);
/*
- * Decrement negative dentry count if it was in the LRU list.
+ * The negative counter only tracks dentries on the LRU. Don't dec if
+ * d_lru is on another list.
*/
- if (dentry->d_flags & DCACHE_LRU_LIST)
+ if ((dentry->d_flags &
+ (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
this_cpu_dec(nr_dentry_negative);
hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
raw_write_seqcount_begin(&dentry->d_seq);
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 5a400259ae74..9a1a93e3888b 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -696,7 +696,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
return err;
}
- strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN +
+ strbuf = kzalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN +
XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
if (!strbuf) {
res = -ENOMEM;
diff --git a/fs/locks.c b/fs/locks.c
index c360d1992d21..bdd94c32256f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1367,9 +1367,9 @@ retry:
locks_wake_up_blocks(&left->c);
}
out:
+ trace_posix_lock_inode(inode, request, error);
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
- trace_posix_lock_inode(inode, request, error);
/*
* Free any unused locks.
*/
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index d6031acc34f0..a944a0f17b53 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,8 +213,7 @@ static int minix_rename(struct mnt_idmap *idmap,
if (!new_de)
goto out_dir;
err = minix_set_link(new_de, new_page, old_inode);
- kunmap(new_page);
- put_page(new_page);
+ unmap_and_put_page(new_page, new_de);
if (err)
goto out_dir;
inode_set_ctime_current(new_inode);
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index a6bb03bea920..4c0401dbbfcf 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -117,7 +117,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
if (folio->index == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
- _debug("no unlock");
+ kdebug("no unlock");
else
folio_unlock(folio);
}
@@ -204,7 +204,7 @@ void netfs_readahead(struct readahead_control *ractl)
struct netfs_inode *ctx = netfs_inode(ractl->mapping->host);
int ret;
- _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
+ kenter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
if (readahead_count(ractl) == 0)
return;
@@ -268,7 +268,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
struct folio *sink = NULL;
int ret;
- _enter("%lx", folio->index);
+ kenter("%lx", folio->index);
rreq = netfs_alloc_request(mapping, file,
folio_file_pos(folio), folio_size(folio),
@@ -508,7 +508,7 @@ retry:
have_folio:
*_folio = folio;
- _leave(" = 0");
+ kleave(" = 0");
return 0;
error_put:
@@ -518,7 +518,7 @@ error:
folio_unlock(folio);
folio_put(folio);
}
- _leave(" = %d", ret);
+ kleave(" = %d", ret);
return ret;
}
EXPORT_SYMBOL(netfs_write_begin);
@@ -536,7 +536,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t flen = folio_size(folio);
int ret;
- _enter("%zx @%llx", flen, start);
+ kenter("%zx @%llx", flen, start);
ret = -ENOMEM;
@@ -567,7 +567,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
error_put:
netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
error:
- _leave(" = %d", ret);
+ kleave(" = %d", ret);
return ret;
}
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index d583af7a2209..ecbc99ec7d36 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -56,7 +56,7 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
struct netfs_group *group = netfs_folio_group(folio);
loff_t pos = folio_file_pos(folio);
- _enter("");
+ kenter("");
if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE)
return NETFS_FLUSH_CONTENT;
@@ -272,12 +272,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
*/
howto = netfs_how_to_modify(ctx, file, folio, netfs_group,
flen, offset, part, maybe_trouble);
- _debug("howto %u", howto);
+ kdebug("howto %u", howto);
switch (howto) {
case NETFS_JUST_PREFETCH:
ret = netfs_prefetch_for_write(file, folio, offset, part);
if (ret < 0) {
- _debug("prefetch = %zd", ret);
+ kdebug("prefetch = %zd", ret);
goto error_folio_unlock;
}
break;
@@ -418,7 +418,7 @@ out:
}
iocb->ki_pos += written;
- _leave(" = %zd [%zd]", written, ret);
+ kleave(" = %zd [%zd]", written, ret);
return written ? written : ret;
error_folio_unlock:
@@ -491,7 +491,7 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct netfs_inode *ictx = netfs_inode(inode);
ssize_t ret;
- _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+ kenter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
@@ -529,7 +529,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
vm_fault_t ret = VM_FAULT_RETRY;
int err;
- _enter("%lx", folio->index);
+ kenter("%lx", folio->index);
sb_start_pagefault(inode->i_sb);
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index 10a1e4da6bda..b6debac6205f 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -33,7 +33,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
size_t orig_count = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
- _enter("");
+ kenter("");
if (!orig_count)
return 0; /* Don't update atime */
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 88f2adfab75e..792ef17bae21 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -37,7 +37,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
size_t len = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
- _enter("");
+ kenter("");
/* We're going to need a bounce buffer if what we transmit is going to
* be different in some way to the source buffer, e.g. because it gets
@@ -45,7 +45,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
*/
// TODO
- _debug("uw %llx-%llx", start, end);
+ kdebug("uw %llx-%llx", start, end);
wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start,
iocb->ki_flags & IOCB_DIRECT ?
@@ -96,7 +96,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
wreq->cleanup = netfs_cleanup_dio_write;
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
- _debug("begin = %zd", ret);
+ kdebug("begin = %zd", ret);
goto out;
}
@@ -143,7 +143,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t pos = iocb->ki_pos;
unsigned long long end = pos + iov_iter_count(from) - 1;
- _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
+ kenter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c
index 9397ed39b0b4..288a73c3072d 100644
--- a/fs/netfs/fscache_cache.c
+++ b/fs/netfs/fscache_cache.c
@@ -237,7 +237,7 @@ int fscache_add_cache(struct fscache_cache *cache,
{
int n_accesses;
- _enter("{%s,%s}", ops->name, cache->name);
+ kenter("{%s,%s}", ops->name, cache->name);
BUG_ON(fscache_cache_state(cache) != FSCACHE_CACHE_IS_PREPARING);
@@ -257,7 +257,7 @@ int fscache_add_cache(struct fscache_cache *cache,
up_write(&fscache_addremove_sem);
pr_notice("Cache \"%s\" added (type %s)\n", cache->name, ops->name);
- _leave(" = 0 [%s]", cache->name);
+ kleave(" = 0 [%s]", cache->name);
return 0;
}
EXPORT_SYMBOL(fscache_add_cache);
diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c
index bce2492186d0..4d1e8bf4c615 100644
--- a/fs/netfs/fscache_cookie.c
+++ b/fs/netfs/fscache_cookie.c
@@ -456,7 +456,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
{
struct fscache_cookie *cookie;
- _enter("V=%x", volume->debug_id);
+ kenter("V=%x", volume->debug_id);
if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255)
return NULL;
@@ -484,7 +484,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
trace_fscache_acquire(cookie);
fscache_stat(&fscache_n_acquires_ok);
- _leave(" = c=%08x", cookie->debug_id);
+ kleave(" = c=%08x", cookie->debug_id);
return cookie;
}
EXPORT_SYMBOL(__fscache_acquire_cookie);
@@ -505,7 +505,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
enum fscache_access_trace trace = fscache_access_lookup_cookie_end_failed;
bool need_withdraw = false;
- _enter("");
+ kenter("");
if (!cookie->volume->cache_priv) {
fscache_create_volume(cookie->volume, true);
@@ -519,7 +519,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
if (cookie->state != FSCACHE_COOKIE_STATE_FAILED)
fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT);
need_withdraw = true;
- _leave(" [fail]");
+ kleave(" [fail]");
goto out;
}
@@ -572,7 +572,7 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify)
bool queue = false;
int n_active;
- _enter("c=%08x", cookie->debug_id);
+ kenter("c=%08x", cookie->debug_id);
if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags),
"Trying to use relinquished cookie\n"))
@@ -636,7 +636,7 @@ again:
spin_unlock(&cookie->lock);
if (queue)
fscache_queue_cookie(cookie, fscache_cookie_get_use_work);
- _leave("");
+ kleave("");
}
EXPORT_SYMBOL(__fscache_use_cookie);
@@ -702,7 +702,7 @@ static void fscache_cookie_state_machine(struct fscache_cookie *cookie)
enum fscache_cookie_state state;
bool wake = false;
- _enter("c=%x", cookie->debug_id);
+ kenter("c=%x", cookie->debug_id);
again:
spin_lock(&cookie->lock);
@@ -820,7 +820,7 @@ out:
spin_unlock(&cookie->lock);
if (wake)
wake_up_cookie_state(cookie);
- _leave("");
+ kleave("");
}
static void fscache_cookie_worker(struct work_struct *work)
@@ -867,7 +867,7 @@ static void fscache_cookie_lru_do_one(struct fscache_cookie *cookie)
set_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags);
spin_unlock(&cookie->lock);
fscache_stat(&fscache_n_cookies_lru_expired);
- _debug("lru c=%x", cookie->debug_id);
+ kdebug("lru c=%x", cookie->debug_id);
__fscache_withdraw_cookie(cookie);
}
@@ -971,7 +971,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
if (retire)
fscache_stat(&fscache_n_relinquishes_retire);
- _enter("c=%08x{%d},%d",
+ kenter("c=%08x{%d},%d",
cookie->debug_id, atomic_read(&cookie->n_active), retire);
if (WARN(test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags),
@@ -1050,7 +1050,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
{
bool is_caching;
- _enter("c=%x", cookie->debug_id);
+ kenter("c=%x", cookie->debug_id);
fscache_stat(&fscache_n_invalidates);
@@ -1072,7 +1072,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
case FSCACHE_COOKIE_STATE_INVALIDATING: /* is_still_valid will catch it */
default:
spin_unlock(&cookie->lock);
- _leave(" [no %u]", cookie->state);
+ kleave(" [no %u]", cookie->state);
return;
case FSCACHE_COOKIE_STATE_LOOKING_UP:
@@ -1081,7 +1081,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
fallthrough;
case FSCACHE_COOKIE_STATE_CREATING:
spin_unlock(&cookie->lock);
- _leave(" [look %x]", cookie->inval_counter);
+ kleave(" [look %x]", cookie->inval_counter);
return;
case FSCACHE_COOKIE_STATE_ACTIVE:
@@ -1094,7 +1094,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
if (is_caching)
fscache_queue_cookie(cookie, fscache_cookie_get_inval_work);
- _leave(" [inv]");
+ kleave(" [inv]");
return;
}
}
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index 38637e5c9b57..bf4eaeec44fb 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -28,12 +28,12 @@ bool fscache_wait_for_operation(struct netfs_cache_resources *cres,
again:
if (!fscache_cache_is_live(cookie->volume->cache)) {
- _leave(" [broken]");
+ kleave(" [broken]");
return false;
}
state = fscache_cookie_state(cookie);
- _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
+ kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_CREATING:
@@ -52,7 +52,7 @@ again:
case FSCACHE_COOKIE_STATE_DROPPED:
case FSCACHE_COOKIE_STATE_RELINQUISHING:
default:
- _leave(" [not live]");
+ kleave(" [not live]");
return false;
}
@@ -92,7 +92,7 @@ again:
spin_lock(&cookie->lock);
state = fscache_cookie_state(cookie);
- _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
+ kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_LOOKING_UP:
@@ -140,7 +140,7 @@ failed:
cres->cache_priv = NULL;
cres->ops = NULL;
fscache_end_cookie_access(cookie, fscache_access_io_not_live);
- _leave(" = -ENOBUFS");
+ kleave(" = -ENOBUFS");
return -ENOBUFS;
}
@@ -224,7 +224,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
if (len == 0)
goto abandon;
- _enter("%llx,%zx", start, len);
+ kenter("%llx,%zx", start, len);
wreq = kzalloc(sizeof(struct fscache_write_request), GFP_NOFS);
if (!wreq)
diff --git a/fs/netfs/fscache_main.c b/fs/netfs/fscache_main.c
index 42e98bb523e3..bf9b33d26e31 100644
--- a/fs/netfs/fscache_main.c
+++ b/fs/netfs/fscache_main.c
@@ -99,7 +99,7 @@ error_wq:
*/
void __exit fscache_exit(void)
{
- _enter("");
+ kenter("");
kmem_cache_destroy(fscache_cookie_jar);
fscache_proc_cleanup();
diff --git a/fs/netfs/fscache_volume.c b/fs/netfs/fscache_volume.c
index cdf991bdd9de..2e2a405ca9b0 100644
--- a/fs/netfs/fscache_volume.c
+++ b/fs/netfs/fscache_volume.c
@@ -27,6 +27,19 @@ struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
return volume;
}
+struct fscache_volume *fscache_try_get_volume(struct fscache_volume *volume,
+ enum fscache_volume_trace where)
+{
+ int ref;
+
+ if (!__refcount_inc_not_zero(&volume->ref, &ref))
+ return NULL;
+
+ trace_fscache_volume(volume->debug_id, ref + 1, where);
+ return volume;
+}
+EXPORT_SYMBOL(fscache_try_get_volume);
+
static void fscache_see_volume(struct fscache_volume *volume,
enum fscache_volume_trace where)
{
@@ -251,7 +264,7 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
fscache_see_volume(volume, fscache_volume_new_acquire);
fscache_stat(&fscache_n_volumes);
up_write(&fscache_addremove_sem);
- _leave(" = v=%x", volume->debug_id);
+ kleave(" = v=%x", volume->debug_id);
return volume;
err_vol:
@@ -420,6 +433,7 @@ void fscache_put_volume(struct fscache_volume *volume,
fscache_free_volume(volume);
}
}
+EXPORT_SYMBOL(fscache_put_volume);
/*
* Relinquish a volume representation cookie.
@@ -452,7 +466,7 @@ void fscache_withdraw_volume(struct fscache_volume *volume)
{
int n_accesses;
- _debug("withdraw V=%x", volume->debug_id);
+ kdebug("withdraw V=%x", volume->debug_id);
/* Allow wakeups on dec-to-0 */
n_accesses = atomic_dec_return(&volume->n_accesses);
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index acd9ca14e264..21e46bc9aa49 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -34,7 +34,6 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync);
/*
* main.c
*/
-extern unsigned int netfs_debug;
extern struct list_head netfs_io_requests;
extern spinlock_t netfs_proc_lock;
extern mempool_t netfs_request_pool;
@@ -344,8 +343,6 @@ extern const struct seq_operations fscache_volumes_seq_ops;
struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
enum fscache_volume_trace where);
-void fscache_put_volume(struct fscache_volume *volume,
- enum fscache_volume_trace where);
bool fscache_begin_volume_access(struct fscache_volume *volume,
struct fscache_cookie *cookie,
enum fscache_access_trace why);
@@ -356,42 +353,12 @@ void fscache_create_volume(struct fscache_volume *volume, bool wait);
* debug tracing
*/
#define dbgprintk(FMT, ...) \
- printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
+ pr_debug("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
-#ifdef __KDEBUG
-#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
-#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
-#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
-
-#elif defined(CONFIG_NETFS_DEBUG)
-#define _enter(FMT, ...) \
-do { \
- if (netfs_debug) \
- kenter(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#define _leave(FMT, ...) \
-do { \
- if (netfs_debug) \
- kleave(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#define _debug(FMT, ...) \
-do { \
- if (netfs_debug) \
- kdebug(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#else
-#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__)
-#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
-#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
-#endif
-
/*
* assertions
*/
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index c93851b98368..c7576481c321 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -130,7 +130,7 @@ static void netfs_reset_subreq_iter(struct netfs_io_request *rreq,
if (count == remaining)
return;
- _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
+ kdebug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
rreq->debug_id, subreq->debug_index,
iov_iter_count(&subreq->io_iter), subreq->transferred,
subreq->len, rreq->i_size,
@@ -326,7 +326,7 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
struct netfs_io_request *rreq = subreq->rreq;
int u;
- _enter("R=%x[%x]{%llx,%lx},%zd",
+ kenter("R=%x[%x]{%llx,%lx},%zd",
rreq->debug_id, subreq->debug_index,
subreq->start, subreq->flags, transferred_or_error);
@@ -435,7 +435,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
struct netfs_inode *ictx = netfs_inode(rreq->inode);
size_t lsize;
- _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
+ kenter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
if (rreq->origin != NETFS_DIO_READ) {
source = netfs_cache_prepare_read(subreq, rreq->i_size);
@@ -518,7 +518,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
subreq->start = rreq->start + rreq->submitted;
subreq->len = io_iter->count;
- _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
+ kdebug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
/* Call out to the cache to find out what it can do with the remaining
@@ -570,7 +570,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
struct iov_iter io_iter;
int ret;
- _enter("R=%x %llx-%llx",
+ kenter("R=%x %llx-%llx",
rreq->debug_id, rreq->start, rreq->start + rreq->len - 1);
if (rreq->len == 0) {
@@ -593,7 +593,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
atomic_set(&rreq->nr_outstanding, 1);
io_iter = rreq->io_iter;
do {
- _debug("submit %llx + %llx >= %llx",
+ kdebug("submit %llx + %llx >= %llx",
rreq->start, rreq->submitted, rreq->i_size);
if (rreq->origin == NETFS_DIO_READ &&
rreq->start + rreq->submitted >= rreq->i_size)
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 5f0f438e5d21..db824c372842 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -20,10 +20,6 @@ MODULE_LICENSE("GPL");
EXPORT_TRACEPOINT_SYMBOL(netfs_sreq);
-unsigned netfs_debug;
-module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");
-
static struct kmem_cache *netfs_request_slab;
static struct kmem_cache *netfs_subrequest_slab;
mempool_t netfs_request_pool;
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 83e644bd518f..172808e83ca8 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -26,7 +26,7 @@ bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio)
struct fscache_cookie *cookie = netfs_i_cookie(ictx);
bool need_use = false;
- _enter("");
+ kenter("");
if (!filemap_dirty_folio(mapping, folio))
return false;
@@ -99,7 +99,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
struct netfs_folio *finfo;
size_t flen = folio_size(folio);
- _enter("{%lx},%zx,%zx", folio->index, offset, length);
+ kenter("{%lx},%zx,%zx", folio->index, offset, length);
if (!folio_test_private(folio))
return;
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 426cf87aaf2e..488147439fe0 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -161,7 +161,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
{
struct list_head *next;
- _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
+ kenter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
if (list_empty(&stream->subrequests))
return;
@@ -374,7 +374,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
unsigned int notes;
int s;
- _enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
+ kenter("%llx-%llx", wreq->start, wreq->start + wreq->len);
trace_netfs_collect(wreq);
trace_netfs_rreq(wreq, netfs_rreq_trace_collect);
@@ -409,7 +409,7 @@ reassess_streams:
front = stream->front;
while (front) {
trace_netfs_collect_sreq(wreq, front);
- //_debug("sreq [%x] %llx %zx/%zx",
+ //kdebug("sreq [%x] %llx %zx/%zx",
// front->debug_index, front->start, front->transferred, front->len);
/* Stall if there may be a discontinuity. */
@@ -598,7 +598,7 @@ reassess_streams:
out:
netfs_put_group_many(wreq->group, wreq->nr_group_rel);
wreq->nr_group_rel = 0;
- _leave(" = %x", notes);
+ kleave(" = %x", notes);
return;
need_retry:
@@ -606,7 +606,7 @@ need_retry:
* that any partially completed op will have had any wholly transferred
* folios removed from it.
*/
- _debug("retry");
+ kdebug("retry");
netfs_retry_writes(wreq);
goto out;
}
@@ -621,7 +621,7 @@ void netfs_write_collection_worker(struct work_struct *work)
size_t transferred;
int s;
- _enter("R=%x", wreq->debug_id);
+ kenter("R=%x", wreq->debug_id);
netfs_see_request(wreq, netfs_rreq_trace_see_work);
if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
@@ -684,7 +684,7 @@ void netfs_write_collection_worker(struct work_struct *work)
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_end(wreq->inode);
- _debug("finished");
+ kdebug("finished");
trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
@@ -744,7 +744,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
struct netfs_io_request *wreq = subreq->rreq;
struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
- _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
+ kenter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
switch (subreq->source) {
case NETFS_UPLOAD_TO_SERVER:
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index ec6cf8707fb0..d7c971df8866 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -99,7 +99,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
if (IS_ERR(wreq))
return wreq;
- _enter("R=%x", wreq->debug_id);
+ kenter("R=%x", wreq->debug_id);
ictx = netfs_inode(wreq->inode);
if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
@@ -159,7 +159,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
subreq->max_nr_segs = INT_MAX;
subreq->stream_nr = stream->stream_nr;
- _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
+ kenter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
@@ -215,7 +215,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream,
{
struct netfs_io_request *wreq = subreq->rreq;
- _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
+ kenter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
return netfs_write_subrequest_terminated(subreq, subreq->error, false);
@@ -272,11 +272,11 @@ int netfs_advance_write(struct netfs_io_request *wreq,
size_t part;
if (!stream->avail) {
- _leave("no write");
+ kleave("no write");
return len;
}
- _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
+ kenter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
if (subreq && start != subreq->start + subreq->len) {
netfs_issue_write(wreq, stream);
@@ -288,7 +288,7 @@ int netfs_advance_write(struct netfs_io_request *wreq,
subreq = stream->construct;
part = min(subreq->max_len - subreq->len, len);
- _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
+ kdebug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
subreq->len += part;
subreq->nr_segs++;
@@ -319,7 +319,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
bool to_eof = false, streamw = false;
bool debug = false;
- _enter("");
+ kenter("");
/* netfs_perform_write() may shift i_size around the page or from out
* of the page to beyond it, but cannot move i_size into or through the
@@ -329,7 +329,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
if (fpos >= i_size) {
/* mmap beyond eof. */
- _debug("beyond eof");
+ kdebug("beyond eof");
folio_start_writeback(folio);
folio_unlock(folio);
wreq->nr_group_rel += netfs_folio_written_back(folio);
@@ -363,7 +363,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
}
flen -= foff;
- _debug("folio %zx %zx %zx", foff, flen, fsize);
+ kdebug("folio %zx %zx %zx", foff, flen, fsize);
/* Deal with discontinuities in the stream of dirty pages. These can
* arise from a number of sources:
@@ -487,7 +487,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
for (int s = 0; s < NR_IO_STREAMS; s++)
netfs_issue_write(wreq, &wreq->io_streams[s]);
- _leave(" = 0");
+ kleave(" = 0");
return 0;
}
@@ -522,7 +522,7 @@ int netfs_writepages(struct address_space *mapping,
netfs_stat(&netfs_n_wh_writepages);
do {
- _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
+ kdebug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
/* It appears we don't have to handle cyclic writeback wrapping. */
WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted);
@@ -546,14 +546,14 @@ int netfs_writepages(struct address_space *mapping,
mutex_unlock(&ictx->wb_lock);
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
- _leave(" = %d", error);
+ kleave(" = %d", error);
return error;
couldnt_start:
netfs_kill_dirty_pages(mapping, wbc, folio);
out:
mutex_unlock(&ictx->wb_lock);
- _leave(" = %d", error);
+ kleave(" = %d", error);
return error;
}
EXPORT_SYMBOL(netfs_writepages);
@@ -590,7 +590,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
struct folio *folio, size_t copied, bool to_page_end,
struct folio **writethrough_cache)
{
- _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
+ kenter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end);
if (!*writethrough_cache) {
@@ -624,7 +624,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
struct netfs_inode *ictx = netfs_inode(wreq->inode);
int ret;
- _enter("R=%x", wreq->debug_id);
+ kenter("R=%x", wreq->debug_id);
if (writethrough_cache)
netfs_write_folio(wreq, wbc, writethrough_cache);
@@ -657,7 +657,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
loff_t start = wreq->start;
int error = 0;
- _enter("%zx", len);
+ kenter("%zx", len);
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_begin(wreq->inode);
@@ -665,7 +665,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
while (len) {
// TODO: Prepare content encryption
- _debug("unbuffered %zx", len);
+ kdebug("unbuffered %zx", len);
part = netfs_advance_write(wreq, upload, start, len, false);
start += part;
len -= part;
@@ -684,6 +684,6 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
if (list_empty(&upload->subrequests))
netfs_wake_write_collector(wreq, false);
- _leave(" = %d", error);
+ kleave(" = %d", error);
return error;
}
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index dddfa604491a..4a29b0138d75 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -383,11 +383,39 @@ found:
struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)
{
- struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop);
+ struct folio *folio;
+ struct nilfs_dir_entry *de, *next_de;
+ size_t limit;
+ char *msg;
+ de = nilfs_get_folio(dir, 0, &folio);
if (IS_ERR(de))
return NULL;
- return nilfs_next_entry(de);
+
+ limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */
+ if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino ||
+ !nilfs_match(1, ".", de))) {
+ msg = "missing '.'";
+ goto fail;
+ }
+
+ next_de = nilfs_next_entry(de);
+ /*
+ * If "next_de" has not reached the end of the chunk, there is
+ * at least one more record. Check whether it matches "..".
+ */
+ if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) ||
+ !nilfs_match(2, "..", next_de))) {
+ msg = "missing '..'";
+ goto fail;
+ }
+ *foliop = folio;
+ return next_de;
+
+fail:
+ nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);
+ folio_release_kmap(folio, de);
+ return NULL;
}
ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index f1f2573bb18d..1374635e89fa 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -246,35 +246,6 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file)
}
/*
- * Expand the size of a readahead to the size of the rsize, if at least as
- * large as a page, allowing for the possibility that rsize is not pow-2
- * aligned.
- */
-static void cifs_expand_readahead(struct netfs_io_request *rreq)
-{
- unsigned int rsize = rreq->rsize;
- loff_t misalignment, i_size = i_size_read(rreq->inode);
-
- if (rsize < PAGE_SIZE)
- return;
-
- if (rsize < INT_MAX)
- rsize = roundup_pow_of_two(rsize);
- else
- rsize = ((unsigned int)INT_MAX + 1) / 2;
-
- misalignment = rreq->start & (rsize - 1);
- if (misalignment) {
- rreq->start -= misalignment;
- rreq->len += misalignment;
- }
-
- rreq->len = round_up(rreq->len, rsize);
- if (rreq->start < i_size && rreq->len > i_size - rreq->start)
- rreq->len = i_size - rreq->start;
-}
-
-/*
* Completion of a request operation.
*/
static void cifs_rreq_done(struct netfs_io_request *rreq)
@@ -329,7 +300,6 @@ const struct netfs_request_ops cifs_req_ops = {
.init_request = cifs_init_request,
.free_request = cifs_free_request,
.free_subrequest = cifs_free_subrequest,
- .expand_readahead = cifs_expand_readahead,
.clamp_length = cifs_clamp_length,
.issue_read = cifs_req_issue_read,
.done = cifs_rreq_done,
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 8d10be1fe18a..c3ee42188d25 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -917,6 +917,40 @@ struct smb2_query_directory_rsp {
__u8 Buffer[];
} __packed;
+/* DeviceType Flags */
+#define FILE_DEVICE_CD_ROM 0x00000002
+#define FILE_DEVICE_CD_ROM_FILE_SYSTEM 0x00000003
+#define FILE_DEVICE_DFS 0x00000006
+#define FILE_DEVICE_DISK 0x00000007
+#define FILE_DEVICE_DISK_FILE_SYSTEM 0x00000008
+#define FILE_DEVICE_FILE_SYSTEM 0x00000009
+#define FILE_DEVICE_NAMED_PIPE 0x00000011
+#define FILE_DEVICE_NETWORK 0x00000012
+#define FILE_DEVICE_NETWORK_FILE_SYSTEM 0x00000014
+#define FILE_DEVICE_NULL 0x00000015
+#define FILE_DEVICE_PARALLEL_PORT 0x00000016
+#define FILE_DEVICE_PRINTER 0x00000018
+#define FILE_DEVICE_SERIAL_PORT 0x0000001b
+#define FILE_DEVICE_STREAMS 0x0000001e
+#define FILE_DEVICE_TAPE 0x0000001f
+#define FILE_DEVICE_TAPE_FILE_SYSTEM 0x00000020
+#define FILE_DEVICE_VIRTUAL_DISK 0x00000024
+#define FILE_DEVICE_NETWORK_REDIRECTOR 0x00000028
+
+/* Device Characteristics */
+#define FILE_REMOVABLE_MEDIA 0x00000001
+#define FILE_READ_ONLY_DEVICE 0x00000002
+#define FILE_FLOPPY_DISKETTE 0x00000004
+#define FILE_WRITE_ONCE_MEDIA 0x00000008
+#define FILE_REMOTE_DEVICE 0x00000010
+#define FILE_DEVICE_IS_MOUNTED 0x00000020
+#define FILE_VIRTUAL_VOLUME 0x00000040
+#define FILE_DEVICE_SECURE_OPEN 0x00000100
+#define FILE_CHARACTERISTIC_TS_DEVICE 0x00001000
+#define FILE_CHARACTERISTIC_WEBDAV_DEVICE 0x00002000
+#define FILE_PORTABLE_DEVICE 0x00004000
+#define FILE_DEVICE_ALLOW_APPCONTAINER_TRAVERSAL 0x00020000
+
/*
* Maximum number of iovs we need for a set-info request.
* The largest one is rename/hardlink
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index e7e07891781b..840c71c66b30 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2051,15 +2051,22 @@ out_err1:
* @access: file access flags
* @disposition: file disposition flags
* @may_flags: set with MAY_ flags
+ * @is_dir: is creating open flags for directory
*
* Return: file open flags
*/
static int smb2_create_open_flags(bool file_present, __le32 access,
__le32 disposition,
- int *may_flags)
+ int *may_flags,
+ bool is_dir)
{
int oflags = O_NONBLOCK | O_LARGEFILE;
+ if (is_dir) {
+ access &= ~FILE_WRITE_DESIRE_ACCESS_LE;
+ ksmbd_debug(SMB, "Discard write access to a directory\n");
+ }
+
if (access & FILE_READ_DESIRED_ACCESS_LE &&
access & FILE_WRITE_DESIRE_ACCESS_LE) {
oflags |= O_RDWR;
@@ -3167,7 +3174,9 @@ int smb2_open(struct ksmbd_work *work)
open_flags = smb2_create_open_flags(file_present, daccess,
req->CreateDisposition,
- &may_flags);
+ &may_flags,
+ req->CreateOptions & FILE_DIRECTORY_FILE_LE ||
+ (file_present && S_ISDIR(d_inode(path.dentry)->i_mode)));
if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
if (open_flags & (O_CREAT | O_TRUNC)) {
@@ -5314,8 +5323,13 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info = (struct filesystem_device_info *)rsp->Buffer;
- info->DeviceType = cpu_to_le32(stfs.f_type);
- info->DeviceCharacteristics = cpu_to_le32(0x00000020);
+ info->DeviceType = cpu_to_le32(FILE_DEVICE_DISK);
+ info->DeviceCharacteristics =
+ cpu_to_le32(FILE_DEVICE_IS_MOUNTED);
+ if (!test_tree_conn_flag(work->tcon,
+ KSMBD_TREE_CONN_FLAG_WRITABLE))
+ info->DeviceCharacteristics |=
+ cpu_to_le32(FILE_READ_ONLY_DEVICE);
rsp->OutputBufferLength = cpu_to_le32(8);
break;
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index eee7320ab0b0..17e409ceaa33 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -2057,7 +2057,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
goto out;
features = uffdio_api.features;
ret = -EINVAL;
- if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES))
+ if (uffdio_api.api != UFFD_API)
goto err_out;
ret = -EPERM;
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
@@ -2081,6 +2081,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
#endif
+
+ ret = -EINVAL;
+ if (features & ~uffdio_api.features)
+ goto err_out;
+
uffdio_api.ioctls = UFFD_API_IOCTLS;
ret = -EFAULT;
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
diff --git a/include/linux/closure.h b/include/linux/closure.h
index 59b8c06b11ff..2af44427107d 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -159,6 +159,7 @@ struct closure {
#ifdef CONFIG_DEBUG_CLOSURES
#define CLOSURE_MAGIC_DEAD 0xc054dead
#define CLOSURE_MAGIC_ALIVE 0xc054a11e
+#define CLOSURE_MAGIC_STACK 0xc05451cc
unsigned int magic;
struct list_head all;
@@ -323,12 +324,18 @@ static inline void closure_init_stack(struct closure *cl)
{
memset(cl, 0, sizeof(struct closure));
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->magic = CLOSURE_MAGIC_STACK;
+#endif
}
static inline void closure_init_stack_release(struct closure *cl)
{
memset(cl, 0, sizeof(struct closure));
atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->magic = CLOSURE_MAGIC_STACK;
+#endif
}
/**
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index bdf7f3eddf0a..4c91a019972b 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -19,6 +19,7 @@
enum fscache_cache_trace;
enum fscache_cookie_trace;
enum fscache_access_trace;
+enum fscache_volume_trace;
enum fscache_cache_state {
FSCACHE_CACHE_IS_NOT_PRESENT, /* No cache is present for this name */
@@ -97,6 +98,11 @@ extern void fscache_withdraw_cookie(struct fscache_cookie *cookie);
extern void fscache_io_error(struct fscache_cache *cache);
+extern struct fscache_volume *
+fscache_try_get_volume(struct fscache_volume *volume,
+ enum fscache_volume_trace where);
+extern void fscache_put_volume(struct fscache_volume *volume,
+ enum fscache_volume_trace where);
extern void fscache_end_volume_access(struct fscache_volume *volume,
struct fscache_cookie *cookie,
enum fscache_access_trace why);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 586a8f0104d7..1dc6248feb83 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1979,8 +1979,9 @@ static inline int subsection_map_index(unsigned long pfn)
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
{
int idx = subsection_map_index(pfn);
+ struct mem_section_usage *usage = READ_ONCE(ms->usage);
- return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
+ return usage ? test_bit(idx, usage->subsection_map) : 0;
}
#else
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 1acf5bac7f50..8c236c651d1d 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -230,7 +230,13 @@ static inline int folio_ref_dec_return(struct folio *folio)
static inline bool page_ref_add_unless(struct page *page, int nr, int u)
{
- bool ret = atomic_add_unless(&page->_refcount, nr, u);
+ bool ret = false;
+
+ rcu_read_lock();
+ /* avoid writing to the vmemmap area being remapped */
+ if (!page_is_fake_head(page) && page_ref_count(page) != u)
+ ret = atomic_add_unless(&page->_refcount, nr, u);
+ rcu_read_unlock();
if (page_ref_tracepoint_active(page_ref_mod_unless))
__page_ref_mod_unless(page, nr, ret);
@@ -258,54 +264,9 @@ static inline bool folio_try_get(struct folio *folio)
return folio_ref_add_unless(folio, 1, 0);
}
-static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
-{
-#ifdef CONFIG_TINY_RCU
- /*
- * The caller guarantees the folio will not be freed from interrupt
- * context, so (on !SMP) we only need preemption to be disabled
- * and TINY_RCU does that for us.
- */
-# ifdef CONFIG_PREEMPT_COUNT
- VM_BUG_ON(!in_atomic() && !irqs_disabled());
-# endif
- VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
- folio_ref_add(folio, count);
-#else
- if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
- /* Either the folio has been freed, or will be freed. */
- return false;
- }
-#endif
- return true;
-}
-
-/**
- * folio_try_get_rcu - Attempt to increase the refcount on a folio.
- * @folio: The folio.
- *
- * This is a version of folio_try_get() optimised for non-SMP kernels.
- * If you are still holding the rcu_read_lock() after looking up the
- * page and know that the page cannot have its refcount decreased to
- * zero in interrupt context, you can use this instead of folio_try_get().
- *
- * Example users include get_user_pages_fast() (as pages are not unmapped
- * from interrupt context) and the page cache lookups (as pages are not
- * truncated from interrupt context). We also know that pages are not
- * frozen in interrupt context for the purposes of splitting or migration.
- *
- * You can also use this function if you're holding a lock that prevents
- * pages being frozen & removed; eg the i_pages lock for the page cache
- * or the mmap_lock or page table lock for page tables. In this case,
- * it will always succeed, and you could have used a plain folio_get(),
- * but it's sometimes more convenient to have a common function called
- * from both locked and RCU-protected contexts.
- *
- * Return: True if the reference count was successfully incremented.
- */
-static inline bool folio_try_get_rcu(struct folio *folio)
+static inline bool folio_ref_try_add(struct folio *folio, int count)
{
- return folio_ref_try_add_rcu(folio, 1);
+ return folio_ref_add_unless(folio, count, 0);
}
static inline int page_ref_freeze(struct page *page, int count)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 59f1df0cde5a..a0a026d2d244 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -354,11 +354,18 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
* a good order (that's 1MB if you're using 4kB pages)
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
+#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
#else
-#define MAX_PAGECACHE_ORDER 8
+#define PREFERRED_MAX_PAGECACHE_ORDER 8
#endif
+/*
+ * xas_split_alloc() does not support arbitrary orders. This implies no
+ * 512MB THP on ARM64 with 64KB base page size.
+ */
+#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1)
+#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
+
/**
* mapping_set_large_folios() - Indicate the file supports large folios.
* @mapping: The file.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5ff5e65a4627..d07d3645d2d5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2198,13 +2198,13 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
#ifdef CONFIG_MEM_ALLOC_PROFILING
-static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
+static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
{
swap(current->alloc_tag, tag);
return tag;
}
-static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
+static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
{
#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
diff --git a/include/linux/swap.h b/include/linux/swap.h
index bd450023b9a4..e685e93ba354 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page)
}
/* linux/mm/workingset.c */
-bool workingset_test_recent(void *shadow, bool file, bool *workingset);
+bool workingset_test_recent(void *shadow, bool file, bool *workingset,
+ bool flush);
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
void workingset_refault(struct folio *folio, void *shadow);
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 21a67dc9efe8..e93ee8d936a9 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -490,9 +490,16 @@ static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle)
{
}
#endif
+
+static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip)
+{
#ifdef CONFIG_TCG_TPM2_HMAC
+ return chip->auth;
+#else
+ return NULL;
+#endif
+}
-int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
u32 handle, u8 *name);
void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
@@ -504,9 +511,27 @@ static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
u8 *passphrase,
int passphraselen)
{
- tpm_buf_append_hmac_session(chip, buf, attributes, passphrase,
- passphraselen);
+ struct tpm_header *head;
+ int offset;
+
+ if (tpm2_chip_auth(chip)) {
+ tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, passphraselen);
+ } else {
+ offset = buf->handles * 4 + TPM_HEADER_SIZE;
+ head = (struct tpm_header *)buf->data;
+
+ /*
+ * If the only sessions are optional, the command tag must change to
+ * TPM2_ST_NO_SESSIONS.
+ */
+ if (tpm_buf_length(buf) == offset)
+ head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
+ }
}
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf);
int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
int rc);
@@ -521,56 +546,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip)
static inline void tpm2_end_auth_session(struct tpm_chip *chip)
{
}
-static inline void tpm_buf_append_name(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u32 handle, u8 *name)
-{
- tpm_buf_append_u32(buf, handle);
- /* count the number of handles in the upper bits of flags */
- buf->handles++;
-}
-static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u8 attributes, u8 *passphrase,
- int passphraselen)
-{
- /* offset tells us where the sessions area begins */
- int offset = buf->handles * 4 + TPM_HEADER_SIZE;
- u32 len = 9 + passphraselen;
-
- if (tpm_buf_length(buf) != offset) {
- /* not the first session so update the existing length */
- len += get_unaligned_be32(&buf->data[offset]);
- put_unaligned_be32(len, &buf->data[offset]);
- } else {
- tpm_buf_append_u32(buf, len);
- }
- /* auth handle */
- tpm_buf_append_u32(buf, TPM2_RS_PW);
- /* nonce */
- tpm_buf_append_u16(buf, 0);
- /* attributes */
- tpm_buf_append_u8(buf, 0);
- /* passphrase */
- tpm_buf_append_u16(buf, passphraselen);
- tpm_buf_append(buf, passphrase, passphraselen);
-}
-static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u8 attributes,
- u8 *passphrase,
- int passphraselen)
-{
- int offset = buf->handles * 4 + TPM_HEADER_SIZE;
- struct tpm_header *head = (struct tpm_header *) buf->data;
-
- /*
- * if the only sessions are optional, the command tag
- * must change to TPM2_ST_NO_SESSIONS
- */
- if (tpm_buf_length(buf) == offset)
- head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
-}
static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip,
struct tpm_buf *buf)
{
diff --git a/include/net/tcx.h b/include/net/tcx.h
index 72a3e75e539f..5ce0ce9e0c02 100644
--- a/include/net/tcx.h
+++ b/include/net/tcx.h
@@ -13,7 +13,7 @@ struct mini_Qdisc;
struct tcx_entry {
struct mini_Qdisc __rcu *miniq;
struct bpf_mprog_bundle bundle;
- bool miniq_active;
+ u32 miniq_active;
struct rcu_head rcu;
};
@@ -125,11 +125,16 @@ static inline void tcx_skeys_dec(bool ingress)
tcx_dec();
}
-static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry,
- const bool active)
+static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry)
{
ASSERT_RTNL();
- tcx_entry(entry)->miniq_active = active;
+ tcx_entry(entry)->miniq_active++;
+}
+
+static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry)
+{
+ ASSERT_RTNL();
+ tcx_entry(entry)->miniq_active--;
}
static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry)
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index a6190aa1b406..f1a73aa83fbb 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -35,12 +35,14 @@ enum fscache_volume_trace {
fscache_volume_get_cookie,
fscache_volume_get_create_work,
fscache_volume_get_hash_collision,
+ fscache_volume_get_withdraw,
fscache_volume_free,
fscache_volume_new_acquire,
fscache_volume_put_cookie,
fscache_volume_put_create_work,
fscache_volume_put_hash_collision,
fscache_volume_put_relinquish,
+ fscache_volume_put_withdraw,
fscache_volume_see_create_work,
fscache_volume_see_hash_wake,
fscache_volume_wait_create_work,
@@ -120,12 +122,14 @@ enum fscache_access_trace {
EM(fscache_volume_get_cookie, "GET cook ") \
EM(fscache_volume_get_create_work, "GET creat") \
EM(fscache_volume_get_hash_collision, "GET hcoll") \
+ EM(fscache_volume_get_withdraw, "GET withd") \
EM(fscache_volume_free, "FREE ") \
EM(fscache_volume_new_acquire, "NEW acq ") \
EM(fscache_volume_put_cookie, "PUT cook ") \
EM(fscache_volume_put_create_work, "PUT creat") \
EM(fscache_volume_put_hash_collision, "PUT hcoll") \
EM(fscache_volume_put_relinquish, "PUT relnq") \
+ EM(fscache_volume_put_withdraw, "PUT withd") \
EM(fscache_volume_see_create_work, "SEE creat") \
EM(fscache_volume_see_hash_wake, "SEE hwake") \
E_(fscache_volume_wait_create_work, "WAIT crea")
diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index aaed8e12ad0b..926b1deb1116 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -802,6 +802,9 @@ struct drm_panthor_queue_submit {
* Must be 64-bit/8-byte aligned (the size of a CS instruction)
*
* Can be zero if stream_addr is zero too.
+ *
+ * When the stream size is zero, the queue submit serves as a
+ * synchronization point.
*/
__u32 stream_size;
@@ -822,6 +825,8 @@ struct drm_panthor_queue_submit {
* ensure the GPU doesn't get garbage when reading the indirect command
* stream buffers. If you want the cache flush to happen
* unconditionally, pass a zero here.
+ *
+ * Ignored when stream_size is zero.
*/
__u32 latest_flush;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5241ba671c5a..b5f0adae8293 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1084,7 +1084,10 @@ struct bpf_async_cb {
struct bpf_prog *prog;
void __rcu *callback_fn;
void *value;
- struct rcu_head rcu;
+ union {
+ struct rcu_head rcu;
+ struct work_struct delete_work;
+ };
u64 flags;
};
@@ -1107,6 +1110,7 @@ struct bpf_async_cb {
struct bpf_hrtimer {
struct bpf_async_cb cb;
struct hrtimer timer;
+ atomic_t cancelling;
};
struct bpf_work {
@@ -1219,6 +1223,21 @@ static void bpf_wq_delete_work(struct work_struct *work)
kfree_rcu(w, cb.rcu);
}
+static void bpf_timer_delete_work(struct work_struct *work)
+{
+ struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work);
+
+ /* Cancel the timer and wait for callback to complete if it was running.
+ * If hrtimer_cancel() can be safely called it's safe to call
+ * kfree_rcu(t) right after for both preallocated and non-preallocated
+ * maps. The async->cb = NULL was already done and no code path can see
+ * address 't' anymore. Timer if armed for existing bpf_hrtimer before
+ * bpf_timer_cancel_and_free will have been cancelled.
+ */
+ hrtimer_cancel(&t->timer);
+ kfree_rcu(t, cb.rcu);
+}
+
static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
enum bpf_async_type type)
{
@@ -1262,6 +1281,8 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
clockid = flags & (MAX_CLOCKS - 1);
t = (struct bpf_hrtimer *)cb;
+ atomic_set(&t->cancelling, 0);
+ INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work);
hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
t->timer.function = bpf_timer_cb;
cb->value = (void *)async - map->record->timer_off;
@@ -1440,7 +1461,8 @@ static void drop_prog_refcnt(struct bpf_async_cb *async)
BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
{
- struct bpf_hrtimer *t;
+ struct bpf_hrtimer *t, *cur_t;
+ bool inc = false;
int ret = 0;
if (in_nmi())
@@ -1452,14 +1474,41 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
ret = -EINVAL;
goto out;
}
- if (this_cpu_read(hrtimer_running) == t) {
+
+ cur_t = this_cpu_read(hrtimer_running);
+ if (cur_t == t) {
/* If bpf callback_fn is trying to bpf_timer_cancel()
* its own timer the hrtimer_cancel() will deadlock
- * since it waits for callback_fn to finish
+ * since it waits for callback_fn to finish.
*/
ret = -EDEADLK;
goto out;
}
+
+ /* Only account in-flight cancellations when invoked from a timer
+ * callback, since we want to avoid waiting only if other _callbacks_
+ * are waiting on us, to avoid introducing lockups. Non-callback paths
+ * are ok, since nobody would synchronously wait for their completion.
+ */
+ if (!cur_t)
+ goto drop;
+ atomic_inc(&t->cancelling);
+ /* Need full barrier after relaxed atomic_inc */
+ smp_mb__after_atomic();
+ inc = true;
+ if (atomic_read(&cur_t->cancelling)) {
+ /* We're cancelling timer t, while some other timer callback is
+ * attempting to cancel us. In such a case, it might be possible
+ * that timer t belongs to the other callback, or some other
+ * callback waiting upon it (creating transitive dependencies
+ * upon us), and we will enter a deadlock if we continue
+ * cancelling and waiting for it synchronously, since it might
+ * do the same. Bail!
+ */
+ ret = -EDEADLK;
+ goto out;
+ }
+drop:
drop_prog_refcnt(&t->cb);
out:
__bpf_spin_unlock_irqrestore(&timer->lock);
@@ -1467,6 +1516,8 @@ out:
* if it was running.
*/
ret = ret ?: hrtimer_cancel(&t->timer);
+ if (inc)
+ atomic_dec(&t->cancelling);
rcu_read_unlock();
return ret;
}
@@ -1512,25 +1563,39 @@ void bpf_timer_cancel_and_free(void *val)
if (!t)
return;
- /* Cancel the timer and wait for callback to complete if it was running.
- * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
- * right after for both preallocated and non-preallocated maps.
- * The async->cb = NULL was already done and no code path can
- * see address 't' anymore.
- *
- * Check that bpf_map_delete/update_elem() wasn't called from timer
- * callback_fn. In such case don't call hrtimer_cancel() (since it will
- * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
- * return -1). Though callback_fn is still running on this cpu it's
+ /* We check that bpf_map_delete/update_elem() was called from timer
+ * callback_fn. In such case we don't call hrtimer_cancel() (since it
+ * will deadlock) and don't call hrtimer_try_to_cancel() (since it will
+ * just return -1). Though callback_fn is still running on this cpu it's
* safe to do kfree(t) because bpf_timer_cb() read everything it needed
* from 't'. The bpf subprog callback_fn won't be able to access 't',
* since async->cb = NULL was already done. The timer will be
* effectively cancelled because bpf_timer_cb() will return
* HRTIMER_NORESTART.
+ *
+ * However, it is possible the timer callback_fn calling us armed the
+ * timer _before_ calling us, such that failing to cancel it here will
+ * cause it to possibly use struct hrtimer after freeing bpf_hrtimer.
+ * Therefore, we _need_ to cancel any outstanding timers before we do
+ * kfree_rcu, even though no more timers can be armed.
+ *
+ * Moreover, we need to schedule work even if timer does not belong to
+ * the calling callback_fn, as on two different CPUs, we can end up in a
+ * situation where both sides run in parallel, try to cancel one
+ * another, and we end up waiting on both sides in hrtimer_cancel
+ * without making forward progress, since timer1 depends on time2
+ * callback to finish, and vice versa.
+ *
+ * CPU 1 (timer1_cb) CPU 2 (timer2_cb)
+ * bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1)
+ *
+ * To avoid these issues, punt to workqueue context when we are in a
+ * timer callback.
*/
- if (this_cpu_read(hrtimer_running) != t)
- hrtimer_cancel(&t->timer);
- kfree_rcu(t, cb.rcu);
+ if (this_cpu_read(hrtimer_running))
+ queue_work(system_unbound_wq, &t->cb.delete_work);
+ else
+ bpf_timer_delete_work(&t->cb.delete_work);
}
/* This function is called by map_delete/update_elem for individual element and
diff --git a/lib/build_OID_registry b/lib/build_OID_registry
index 56d8bafeb848..8267e8d71338 100755
--- a/lib/build_OID_registry
+++ b/lib/build_OID_registry
@@ -38,7 +38,9 @@ close IN_FILE || die;
#
open C_FILE, ">$ARGV[1]" or die;
print C_FILE "/*\n";
-print C_FILE " * Automatically generated by ", $0 =~ s#^\Q$abs_srctree/\E##r, ". Do not edit\n";
+my $scriptname = $0;
+$scriptname =~ s#^\Q$abs_srctree/\E##;
+print C_FILE " * Automatically generated by ", $scriptname, ". Do not edit\n";
print C_FILE " */\n";
#
diff --git a/lib/closure.c b/lib/closure.c
index c971216d9d77..116afae2eed9 100644
--- a/lib/closure.c
+++ b/lib/closure.c
@@ -244,6 +244,9 @@ void closure_debug_destroy(struct closure *cl)
{
unsigned long flags;
+ if (cl->magic == CLOSURE_MAGIC_STACK)
+ return;
+
BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
cl->magic = CLOSURE_MAGIC_DEAD;
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 6392f1cc97a3..e66823d6b10b 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1358,14 +1358,31 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
* access frequencies are similar. This is for minimizing the monitoring
* overhead under the dynamically changeable access pattern. If a merge was
* unnecessarily made, later 'kdamond_split_regions()' will revert it.
+ *
+ * The total number of regions could be higher than the user-defined limit,
+ * max_nr_regions for some cases. For example, the user can update
+ * max_nr_regions to a number that lower than the current number of regions
+ * while DAMON is running. For such a case, repeat merging until the limit is
+ * met while increasing @threshold up to possible maximum level.
*/
static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
unsigned long sz_limit)
{
struct damon_target *t;
-
- damon_for_each_target(t, c)
- damon_merge_regions_of(t, threshold, sz_limit);
+ unsigned int nr_regions;
+ unsigned int max_thres;
+
+ max_thres = c->attrs.aggr_interval /
+ (c->attrs.sample_interval ? c->attrs.sample_interval : 1);
+ do {
+ nr_regions = 0;
+ damon_for_each_target(t, c) {
+ damon_merge_regions_of(t, threshold, sz_limit);
+ nr_regions += damon_nr_regions(t);
+ }
+ threshold = max(1, threshold * 2);
+ } while (nr_regions > c->attrs.max_nr_regions &&
+ threshold / 2 < max_thres);
}
/*
diff --git a/mm/filemap.c b/mm/filemap.c
index 876cc64aadd7..657bcd887fdb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1847,7 +1847,7 @@ repeat:
if (!folio || xa_is_value(folio))
goto out;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
goto repeat;
if (unlikely(folio != xas_reload(&xas))) {
@@ -2001,7 +2001,7 @@ retry:
if (!folio || xa_is_value(folio))
return folio;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
goto reset;
if (unlikely(folio != xas_reload(xas))) {
@@ -2181,7 +2181,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
if (xa_is_value(folio))
goto update_start;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
goto retry;
if (unlikely(folio != xas_reload(&xas)))
@@ -2313,7 +2313,7 @@ static void filemap_get_read_batch(struct address_space *mapping,
break;
if (xa_is_sibling(folio))
break;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
goto retry;
if (unlikely(folio != xas_reload(&xas)))
@@ -3124,7 +3124,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* Use the readahead code, even if readahead is disabled */
- if (vm_flags & VM_HUGEPAGE) {
+ if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
ra->size = HPAGE_PMD_NR;
@@ -3231,7 +3231,8 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
return 0;
- ptep = pte_offset_map(vmf->pmd, vmf->address);
+ ptep = pte_offset_map_nolock(vma->vm_mm, vmf->pmd, vmf->address,
+ &vmf->ptl);
if (unlikely(!ptep))
return VM_FAULT_NOPAGE;
@@ -3472,7 +3473,7 @@ static struct folio *next_uptodate_folio(struct xa_state *xas,
continue;
if (folio_test_locked(folio))
continue;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
continue;
/* Has the page moved or been split? */
if (unlikely(folio != xas_reload(xas)))
@@ -4248,6 +4249,9 @@ static void filemap_cachestat(struct address_space *mapping,
XA_STATE(xas, &mapping->i_pages, first_index);
struct folio *folio;
+ /* Flush stats (and potentially sleep) outside the RCU read section. */
+ mem_cgroup_flush_stats_ratelimited(NULL);
+
rcu_read_lock();
xas_for_each(&xas, folio, last_index) {
int order;
@@ -4311,7 +4315,7 @@ static void filemap_cachestat(struct address_space *mapping,
goto resched;
}
#endif
- if (workingset_test_recent(shadow, true, &workingset))
+ if (workingset_test_recent(shadow, true, &workingset, false))
cs->nr_recently_evicted += nr_pages;
goto resched;
diff --git a/mm/gup.c b/mm/gup.c
index ca0f5cedce9b..f1d6bc06eb52 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -76,7 +76,7 @@ retry:
folio = page_folio(page);
if (WARN_ON_ONCE(folio_ref_count(folio) < 0))
return NULL;
- if (unlikely(!folio_ref_try_add_rcu(folio, refs)))
+ if (unlikely(!folio_ref_try_add(folio, refs)))
return NULL;
/*
@@ -97,95 +97,6 @@ retry:
return folio;
}
-/**
- * try_grab_folio() - Attempt to get or pin a folio.
- * @page: pointer to page to be grabbed
- * @refs: the value to (effectively) add to the folio's refcount
- * @flags: gup flags: these are the FOLL_* flag values.
- *
- * "grab" names in this file mean, "look at flags to decide whether to use
- * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
- *
- * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
- * same time. (That's true throughout the get_user_pages*() and
- * pin_user_pages*() APIs.) Cases:
- *
- * FOLL_GET: folio's refcount will be incremented by @refs.
- *
- * FOLL_PIN on large folios: folio's refcount will be incremented by
- * @refs, and its pincount will be incremented by @refs.
- *
- * FOLL_PIN on single-page folios: folio's refcount will be incremented by
- * @refs * GUP_PIN_COUNTING_BIAS.
- *
- * Return: The folio containing @page (with refcount appropriately
- * incremented) for success, or NULL upon failure. If neither FOLL_GET
- * nor FOLL_PIN was set, that's considered failure, and furthermore,
- * a likely bug in the caller, so a warning is also emitted.
- */
-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
-{
- struct folio *folio;
-
- if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))
- return NULL;
-
- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
- return NULL;
-
- if (flags & FOLL_GET)
- return try_get_folio(page, refs);
-
- /* FOLL_PIN is set */
-
- /*
- * Don't take a pin on the zero page - it's not going anywhere
- * and it is used in a *lot* of places.
- */
- if (is_zero_page(page))
- return page_folio(page);
-
- folio = try_get_folio(page, refs);
- if (!folio)
- return NULL;
-
- /*
- * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
- * right zone, so fail and let the caller fall back to the slow
- * path.
- */
- if (unlikely((flags & FOLL_LONGTERM) &&
- !folio_is_longterm_pinnable(folio))) {
- if (!put_devmap_managed_folio_refs(folio, refs))
- folio_put_refs(folio, refs);
- return NULL;
- }
-
- /*
- * When pinning a large folio, use an exact count to track it.
- *
- * However, be sure to *also* increment the normal folio
- * refcount field at least once, so that the folio really
- * is pinned. That's why the refcount from the earlier
- * try_get_folio() is left intact.
- */
- if (folio_test_large(folio))
- atomic_add(refs, &folio->_pincount);
- else
- folio_ref_add(folio,
- refs * (GUP_PIN_COUNTING_BIAS - 1));
- /*
- * Adjust the pincount before re-checking the PTE for changes.
- * This is essentially a smp_mb() and is paired with a memory
- * barrier in folio_try_share_anon_rmap_*().
- */
- smp_mb__after_atomic();
-
- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
-
- return folio;
-}
-
static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
{
if (flags & FOLL_PIN) {
@@ -203,58 +114,59 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
}
/**
- * try_grab_page() - elevate a page's refcount by a flag-dependent amount
- * @page: pointer to page to be grabbed
- * @flags: gup flags: these are the FOLL_* flag values.
+ * try_grab_folio() - add a folio's refcount by a flag-dependent amount
+ * @folio: pointer to folio to be grabbed
+ * @refs: the value to (effectively) add to the folio's refcount
+ * @flags: gup flags: these are the FOLL_* flag values
*
* This might not do anything at all, depending on the flags argument.
*
* "grab" names in this file mean, "look at flags to decide whether to use
- * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
+ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
*
* Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same
- * time. Cases: please see the try_grab_folio() documentation, with
- * "refs=1".
+ * time.
*
* Return: 0 for success, or if no action was required (if neither FOLL_PIN
* nor FOLL_GET was set, nothing is done). A negative error code for failure:
*
- * -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not
+ * -ENOMEM FOLL_GET or FOLL_PIN was set, but the folio could not
* be grabbed.
+ *
+ * It is called when we have a stable reference for the folio, typically in
+ * GUP slow path.
*/
-int __must_check try_grab_page(struct page *page, unsigned int flags)
+int __must_check try_grab_folio(struct folio *folio, int refs,
+ unsigned int flags)
{
- struct folio *folio = page_folio(page);
-
if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
return -ENOMEM;
- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
+ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page)))
return -EREMOTEIO;
if (flags & FOLL_GET)
- folio_ref_inc(folio);
+ folio_ref_add(folio, refs);
else if (flags & FOLL_PIN) {
/*
* Don't take a pin on the zero page - it's not going anywhere
* and it is used in a *lot* of places.
*/
- if (is_zero_page(page))
+ if (is_zero_folio(folio))
return 0;
/*
- * Similar to try_grab_folio(): be sure to *also*
- * increment the normal page refcount field at least once,
+ * Increment the normal page refcount field at least once,
* so that the page really is pinned.
*/
if (folio_test_large(folio)) {
- folio_ref_add(folio, 1);
- atomic_add(1, &folio->_pincount);
+ folio_ref_add(folio, refs);
+ atomic_add(refs, &folio->_pincount);
} else {
- folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
+ folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS);
}
- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
+ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
}
return 0;
@@ -515,6 +427,102 @@ static int record_subpages(struct page *page, unsigned long sz,
return nr;
}
+
+/**
+ * try_grab_folio_fast() - Attempt to get or pin a folio in fast path.
+ * @page: pointer to page to be grabbed
+ * @refs: the value to (effectively) add to the folio's refcount
+ * @flags: gup flags: these are the FOLL_* flag values.
+ *
+ * "grab" names in this file mean, "look at flags to decide whether to use
+ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
+ *
+ * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
+ * same time. (That's true throughout the get_user_pages*() and
+ * pin_user_pages*() APIs.) Cases:
+ *
+ * FOLL_GET: folio's refcount will be incremented by @refs.
+ *
+ * FOLL_PIN on large folios: folio's refcount will be incremented by
+ * @refs, and its pincount will be incremented by @refs.
+ *
+ * FOLL_PIN on single-page folios: folio's refcount will be incremented by
+ * @refs * GUP_PIN_COUNTING_BIAS.
+ *
+ * Return: The folio containing @page (with refcount appropriately
+ * incremented) for success, or NULL upon failure. If neither FOLL_GET
+ * nor FOLL_PIN was set, that's considered failure, and furthermore,
+ * a likely bug in the caller, so a warning is also emitted.
+ *
+ * It uses add ref unless zero to elevate the folio refcount and must be called
+ * in fast path only.
+ */
+static struct folio *try_grab_folio_fast(struct page *page, int refs,
+ unsigned int flags)
+{
+ struct folio *folio;
+
+ /* Raise warn if it is not called in fast GUP */
+ VM_WARN_ON_ONCE(!irqs_disabled());
+
+ if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))
+ return NULL;
+
+ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
+ return NULL;
+
+ if (flags & FOLL_GET)
+ return try_get_folio(page, refs);
+
+ /* FOLL_PIN is set */
+
+ /*
+ * Don't take a pin on the zero page - it's not going anywhere
+ * and it is used in a *lot* of places.
+ */
+ if (is_zero_page(page))
+ return page_folio(page);
+
+ folio = try_get_folio(page, refs);
+ if (!folio)
+ return NULL;
+
+ /*
+ * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
+ * right zone, so fail and let the caller fall back to the slow
+ * path.
+ */
+ if (unlikely((flags & FOLL_LONGTERM) &&
+ !folio_is_longterm_pinnable(folio))) {
+ if (!put_devmap_managed_folio_refs(folio, refs))
+ folio_put_refs(folio, refs);
+ return NULL;
+ }
+
+ /*
+ * When pinning a large folio, use an exact count to track it.
+ *
+ * However, be sure to *also* increment the normal folio
+ * refcount field at least once, so that the folio really
+ * is pinned. That's why the refcount from the earlier
+ * try_get_folio() is left intact.
+ */
+ if (folio_test_large(folio))
+ atomic_add(refs, &folio->_pincount);
+ else
+ folio_ref_add(folio,
+ refs * (GUP_PIN_COUNTING_BIAS - 1));
+ /*
+ * Adjust the pincount before re-checking the PTE for changes.
+ * This is essentially a smp_mb() and is paired with a memory
+ * barrier in folio_try_share_anon_rmap_*().
+ */
+ smp_mb__after_atomic();
+
+ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
+
+ return folio;
+}
#endif /* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */
#ifdef CONFIG_ARCH_HAS_HUGEPD
@@ -535,7 +543,7 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
*/
static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz,
unsigned long addr, unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+ struct page **pages, int *nr, bool fast)
{
unsigned long pte_end;
struct page *page;
@@ -558,9 +566,15 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz
page = pte_page(pte);
refs = record_subpages(page, sz, addr, end, pages + *nr);
- folio = try_grab_folio(page, refs, flags);
- if (!folio)
- return 0;
+ if (fast) {
+ folio = try_grab_folio_fast(page, refs, flags);
+ if (!folio)
+ return 0;
+ } else {
+ folio = page_folio(page);
+ if (try_grab_folio(folio, refs, flags))
+ return 0;
+ }
if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
gup_put_folio(folio, refs, flags);
@@ -588,7 +602,7 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz
static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
unsigned long addr, unsigned int pdshift,
unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+ struct page **pages, int *nr, bool fast)
{
pte_t *ptep;
unsigned long sz = 1UL << hugepd_shift(hugepd);
@@ -598,7 +612,8 @@ static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
ptep = hugepte_offset(hugepd, addr, pdshift);
do {
next = hugepte_addr_end(addr, end, sz);
- ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr);
+ ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr,
+ fast);
if (ret != 1)
return ret;
} while (ptep++, addr = next, addr != end);
@@ -625,7 +640,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
ptep = hugepte_offset(hugepd, addr, pdshift);
ptl = huge_pte_lock(h, vma->vm_mm, ptep);
ret = gup_hugepd(vma, hugepd, addr, pdshift, addr + PAGE_SIZE,
- flags, &page, &nr);
+ flags, &page, &nr, false);
spin_unlock(ptl);
if (ret == 1) {
@@ -642,7 +657,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
static inline int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
unsigned long addr, unsigned int pdshift,
unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+ struct page **pages, int *nr, bool fast)
{
return 0;
}
@@ -729,7 +744,7 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
gup_must_unshare(vma, flags, page))
return ERR_PTR(-EMLINK);
- ret = try_grab_page(page, flags);
+ ret = try_grab_folio(page_folio(page), 1, flags);
if (ret)
page = ERR_PTR(ret);
else
@@ -806,7 +821,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
!PageAnonExclusive(page), page);
- ret = try_grab_page(page, flags);
+ ret = try_grab_folio(page_folio(page), 1, flags);
if (ret)
return ERR_PTR(ret);
@@ -968,8 +983,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
!PageAnonExclusive(page), page);
- /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
- ret = try_grab_page(page, flags);
+ /* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */
+ ret = try_grab_folio(page_folio(page), 1, flags);
if (unlikely(ret)) {
page = ERR_PTR(ret);
goto out;
@@ -1233,7 +1248,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
goto unmap;
*page = pte_page(entry);
}
- ret = try_grab_page(*page, gup_flags);
+ ret = try_grab_folio(page_folio(*page), 1, gup_flags);
if (unlikely(ret))
goto unmap;
out:
@@ -1636,20 +1651,19 @@ next_page:
* pages.
*/
if (page_increm > 1) {
- struct folio *folio;
+ struct folio *folio = page_folio(page);
/*
* Since we already hold refcount on the
* large folio, this should never fail.
*/
- folio = try_grab_folio(page, page_increm - 1,
- foll_flags);
- if (WARN_ON_ONCE(!folio)) {
+ if (try_grab_folio(folio, page_increm - 1,
+ foll_flags)) {
/*
* Release the 1st page ref if the
* folio is problematic, fail hard.
*/
- gup_put_folio(page_folio(page), 1,
+ gup_put_folio(folio, 1,
foll_flags);
ret = -EFAULT;
goto out;
@@ -2797,7 +2811,6 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
* This code is based heavily on the PowerPC implementation by Nick Piggin.
*/
#ifdef CONFIG_HAVE_GUP_FAST
-
/*
* Used in the GUP-fast path to determine whether GUP is permitted to work on
* a specific folio.
@@ -2962,7 +2975,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
- folio = try_grab_folio(page, 1, flags);
+ folio = try_grab_folio_fast(page, 1, flags);
if (!folio)
goto pte_unmap;
@@ -3049,7 +3062,7 @@ static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr,
break;
}
- folio = try_grab_folio(page, 1, flags);
+ folio = try_grab_folio_fast(page, 1, flags);
if (!folio) {
gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
break;
@@ -3138,7 +3151,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
page = pmd_page(orig);
refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr);
- folio = try_grab_folio(page, refs, flags);
+ folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
return 0;
@@ -3182,7 +3195,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
page = pud_page(orig);
refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr);
- folio = try_grab_folio(page, refs, flags);
+ folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
return 0;
@@ -3222,7 +3235,7 @@ static int gup_fast_pgd_leaf(pgd_t orig, pgd_t *pgdp, unsigned long addr,
page = pgd_page(orig);
refs = record_subpages(page, PGDIR_SIZE, addr, end, pages + *nr);
- folio = try_grab_folio(page, refs, flags);
+ folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
return 0;
@@ -3276,7 +3289,8 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
* pmd format and THP pmd format
*/
if (gup_hugepd(NULL, __hugepd(pmd_val(pmd)), addr,
- PMD_SHIFT, next, flags, pages, nr) != 1)
+ PMD_SHIFT, next, flags, pages, nr,
+ true) != 1)
return 0;
} else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags,
pages, nr))
@@ -3306,7 +3320,8 @@ static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
return 0;
} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
if (gup_hugepd(NULL, __hugepd(pud_val(pud)), addr,
- PUD_SHIFT, next, flags, pages, nr) != 1)
+ PUD_SHIFT, next, flags, pages, nr,
+ true) != 1)
return 0;
} else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags,
pages, nr))
@@ -3333,7 +3348,8 @@ static int gup_fast_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
BUILD_BUG_ON(p4d_leaf(p4d));
if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
if (gup_hugepd(NULL, __hugepd(p4d_val(p4d)), addr,
- P4D_SHIFT, next, flags, pages, nr) != 1)
+ P4D_SHIFT, next, flags, pages, nr,
+ true) != 1)
return 0;
} else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags,
pages, nr))
@@ -3362,7 +3378,8 @@ static void gup_fast_pgd_range(unsigned long addr, unsigned long end,
return;
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
if (gup_hugepd(NULL, __hugepd(pgd_val(pgd)), addr,
- PGDIR_SHIFT, next, flags, pages, nr) != 1)
+ PGDIR_SHIFT, next, flags, pages, nr,
+ true) != 1)
return;
} else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags,
pages, nr))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index db7946a0a28c..2120f7478e55 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1331,7 +1331,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
if (!*pgmap)
return ERR_PTR(-EFAULT);
page = pfn_to_page(pfn);
- ret = try_grab_page(page, flags);
+ ret = try_grab_folio(page_folio(page), 1, flags);
if (ret)
page = ERR_PTR(ret);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f35abff8be60..43e1af868cfd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1625,13 +1625,10 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
* folio appears as just a compound page. Otherwise, wait until after
* allocating vmemmap to clear the flag.
*
- * A reference is held on the folio, except in the case of demote.
- *
* Must be called with hugetlb lock held.
*/
-static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
- bool adjust_surplus,
- bool demote)
+static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
+ bool adjust_surplus)
{
int nid = folio_nid(folio);
@@ -1645,6 +1642,7 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
list_del(&folio->lru);
if (folio_test_hugetlb_freed(folio)) {
+ folio_clear_hugetlb_freed(folio);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
}
@@ -1661,33 +1659,13 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
if (!folio_test_hugetlb_vmemmap_optimized(folio))
__folio_clear_hugetlb(folio);
- /*
- * In the case of demote we do not ref count the page as it will soon
- * be turned into a page of smaller size.
- */
- if (!demote)
- folio_ref_unfreeze(folio, 1);
-
h->nr_huge_pages--;
h->nr_huge_pages_node[nid]--;
}
-static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
- bool adjust_surplus)
-{
- __remove_hugetlb_folio(h, folio, adjust_surplus, false);
-}
-
-static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio,
- bool adjust_surplus)
-{
- __remove_hugetlb_folio(h, folio, adjust_surplus, true);
-}
-
static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
bool adjust_surplus)
{
- int zeroed;
int nid = folio_nid(folio);
VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio);
@@ -1711,21 +1689,6 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
*/
folio_set_hugetlb_vmemmap_optimized(folio);
- /*
- * This folio is about to be managed by the hugetlb allocator and
- * should have no users. Drop our reference, and check for others
- * just in case.
- */
- zeroed = folio_put_testzero(folio);
- if (unlikely(!zeroed))
- /*
- * It is VERY unlikely soneone else has taken a ref
- * on the folio. In this case, we simply return as
- * free_huge_folio() will be called when this other ref
- * is dropped.
- */
- return;
-
arch_clear_hugetlb_flags(folio);
enqueue_hugetlb_folio(h, folio);
}
@@ -1763,13 +1726,6 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
}
/*
- * Move PageHWPoison flag from head page to the raw error pages,
- * which makes any healthy subpages reusable.
- */
- if (unlikely(folio_test_hwpoison(folio)))
- folio_clear_hugetlb_hwpoison(folio);
-
- /*
* If vmemmap pages were allocated above, then we need to clear the
* hugetlb flag under the hugetlb lock.
*/
@@ -1780,6 +1736,15 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
}
/*
+ * Move PageHWPoison flag from head page to the raw error pages,
+ * which makes any healthy subpages reusable.
+ */
+ if (unlikely(folio_test_hwpoison(folio)))
+ folio_clear_hugetlb_hwpoison(folio);
+
+ folio_ref_unfreeze(folio, 1);
+
+ /*
* Non-gigantic pages demoted from CMA allocated gigantic pages
* need to be given back to CMA in free_gigantic_folio.
*/
@@ -2197,6 +2162,9 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
nid = numa_mem_id();
retry:
folio = __folio_alloc(gfp_mask, order, nid, nmask);
+ /* Ensure hugetlb folio won't have large_rmappable flag set. */
+ if (folio)
+ folio_clear_large_rmappable(folio);
if (folio && !folio_ref_freeze(folio, 1)) {
folio_put(folio);
@@ -3079,11 +3047,8 @@ retry:
free_new:
spin_unlock_irq(&hugetlb_lock);
- if (new_folio) {
- /* Folio has a zero ref count, but needs a ref to be freed */
- folio_ref_unfreeze(new_folio, 1);
+ if (new_folio)
update_and_free_hugetlb_folio(h, new_folio, false);
- }
return ret;
}
@@ -3938,7 +3903,7 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
- remove_hugetlb_folio_for_demote(h, folio, false);
+ remove_hugetlb_folio(h, folio, false);
spin_unlock_irq(&hugetlb_lock);
/*
@@ -3952,7 +3917,6 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
if (rc) {
/* Allocation of vmemmmap failed, we can not demote folio */
spin_lock_irq(&hugetlb_lock);
- folio_ref_unfreeze(folio, 1);
add_hugetlb_folio(h, folio, false);
return rc;
}
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index b9a55322e52c..8193906515c6 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -446,6 +446,8 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
unsigned long vmemmap_reuse;
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio);
+
if (!folio_test_hugetlb_vmemmap_optimized(folio))
return 0;
@@ -481,6 +483,9 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
*/
int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio)
{
+ /* avoid writes from page_ref_add_unless() while unfolding vmemmap */
+ synchronize_rcu();
+
return __hugetlb_vmemmap_restore_folio(h, folio, 0);
}
@@ -505,6 +510,9 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
long restored = 0;
long ret = 0;
+ /* avoid writes from page_ref_add_unless() while unfolding vmemmap */
+ synchronize_rcu();
+
list_for_each_entry_safe(folio, t_folio, folio_list, lru) {
if (folio_test_hugetlb_vmemmap_optimized(folio)) {
ret = __hugetlb_vmemmap_restore_folio(h, folio,
@@ -550,6 +558,8 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
unsigned long vmemmap_reuse;
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio);
+
if (!vmemmap_should_optimize_folio(h, folio))
return ret;
@@ -601,6 +611,9 @@ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
{
LIST_HEAD(vmemmap_pages);
+ /* avoid writes from page_ref_add_unless() while folding vmemmap */
+ synchronize_rcu();
+
__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0);
free_vmemmap_page_list(&vmemmap_pages);
}
@@ -644,6 +657,9 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
flush_tlb_all();
+ /* avoid writes from page_ref_add_unless() while folding vmemmap */
+ synchronize_rcu();
+
list_for_each_entry(folio, folio_list, lru) {
int ret;
diff --git a/mm/internal.h b/mm/internal.h
index 6902b7dd8509..cc2c5e07fad3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1182,8 +1182,8 @@ int migrate_device_coherent_page(struct page *page);
/*
* mm/gup.c
*/
-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
-int __must_check try_grab_page(struct page *page, unsigned int flags);
+int __must_check try_grab_folio(struct folio *folio, int refs,
+ unsigned int flags);
/*
* mm/huge_memory.c
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 71fe2a95b8bd..8f2f1bb18c9c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7823,17 +7823,6 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
/* Transfer the charge and the css ref */
commit_charge(new, memcg);
- /*
- * If the old folio is a large folio and is in the split queue, it needs
- * to be removed from the split queue now, in case getting an incorrect
- * split queue in destroy_large_folio() after the memcg of the old folio
- * is cleared.
- *
- * In addition, the old folio is about to be freed after migration, so
- * removing from the split queue a bit earlier seems reasonable.
- */
- if (folio_test_large(old) && folio_test_large_rmappable(old))
- folio_undo_large_rmappable(old);
old->memcg_data = 0;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 20cb9f5f7446..a8c6f466e33a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -415,6 +415,15 @@ int folio_migrate_mapping(struct address_space *mapping,
if (folio_ref_count(folio) != expected_count)
return -EAGAIN;
+ /* Take off deferred split queue while frozen and memcg set */
+ if (folio_test_large(folio) &&
+ folio_test_large_rmappable(folio)) {
+ if (!folio_ref_freeze(folio, expected_count))
+ return -EAGAIN;
+ folio_undo_large_rmappable(folio);
+ folio_ref_unfreeze(folio, expected_count);
+ }
+
/* No turning back from here */
newfolio->index = folio->index;
newfolio->mapping = folio->mapping;
@@ -433,6 +442,10 @@ int folio_migrate_mapping(struct address_space *mapping,
return -EAGAIN;
}
+ /* Take off deferred split queue while frozen and memcg set */
+ if (folio_test_large(folio) && folio_test_large_rmappable(folio))
+ folio_undo_large_rmappable(folio);
+
/*
* Now we know that no one else is looking at the folio:
* no turning back from here.
diff --git a/mm/readahead.c b/mm/readahead.c
index c1b23989d9ca..817b2a352d78 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -503,11 +503,11 @@ void page_cache_ra_order(struct readahead_control *ractl,
limit = min(limit, index + ra->size - 1);
- if (new_order < MAX_PAGECACHE_ORDER) {
+ if (new_order < MAX_PAGECACHE_ORDER)
new_order += 2;
- new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
- new_order = min_t(unsigned int, new_order, ilog2(ra->size));
- }
+
+ new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
+ new_order = min_t(unsigned int, new_order, ilog2(ra->size));
/* See comment in page_cache_ra_unbounded() */
nofs = memalloc_nofs_save();
diff --git a/mm/shmem.c b/mm/shmem.c
index a8b181a63402..c1befe046c7e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -541,8 +541,9 @@ static bool shmem_confirm_swap(struct address_space *mapping,
static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
-bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
- struct mm_struct *mm, unsigned long vm_flags)
+static bool __shmem_is_huge(struct inode *inode, pgoff_t index,
+ bool shmem_huge_force, struct mm_struct *mm,
+ unsigned long vm_flags)
{
loff_t i_size;
@@ -573,6 +574,16 @@ bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
}
}
+bool shmem_is_huge(struct inode *inode, pgoff_t index,
+ bool shmem_huge_force, struct mm_struct *mm,
+ unsigned long vm_flags)
+{
+ if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER)
+ return false;
+
+ return __shmem_is_huge(inode, index, shmem_huge_force, mm, vm_flags);
+}
+
#if defined(CONFIG_SYSFS)
static int shmem_parse_huge(const char *str)
{
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d0cbdd7c1e5b..e34ea860153f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2543,7 +2543,15 @@ static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
static struct xarray *
addr_to_vb_xa(unsigned long addr)
{
- int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus();
+ int index = (addr / VMAP_BLOCK_SIZE) % nr_cpu_ids;
+
+ /*
+ * Please note, nr_cpu_ids points on a highest set
+ * possible bit, i.e. we never invoke cpumask_next()
+ * if an index points on it which is nr_cpu_ids - 1.
+ */
+ if (!cpu_possible(index))
+ index = cpumask_next(index, cpu_possible_mask);
return &per_cpu(vmap_block_queue, index).vmap_blocks;
}
diff --git a/mm/workingset.c b/mm/workingset.c
index c22adb93622a..a2b28e356e68 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -412,10 +412,12 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
* @file: whether the corresponding folio is from the file lru.
* @workingset: where the workingset value unpacked from shadow should
* be stored.
+ * @flush: whether to flush cgroup rstat.
*
* Return: true if the shadow is for a recently evicted folio; false otherwise.
*/
-bool workingset_test_recent(void *shadow, bool file, bool *workingset)
+bool workingset_test_recent(void *shadow, bool file, bool *workingset,
+ bool flush)
{
struct mem_cgroup *eviction_memcg;
struct lruvec *eviction_lruvec;
@@ -467,10 +469,16 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)
/*
* Flush stats (and potentially sleep) outside the RCU read section.
+ *
+ * Note that workingset_test_recent() itself might be called in RCU read
+ * section (for e.g, in cachestat) - these callers need to skip flushing
+ * stats (via the flush argument).
+ *
* XXX: With per-memcg flushing and thresholding, is ratelimiting
* still needed here?
*/
- mem_cgroup_flush_stats_ratelimited(eviction_memcg);
+ if (flush)
+ mem_cgroup_flush_stats_ratelimited(eviction_memcg);
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
@@ -558,7 +566,7 @@ void workingset_refault(struct folio *folio, void *shadow)
mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
- if (!workingset_test_recent(shadow, file, &workingset))
+ if (!workingset_test_recent(shadow, file, &workingset, true))
return;
folio_set_active(folio);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b2f391edb9e8..a40f733b37d7 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -423,11 +423,12 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
if (copy > len)
copy = len;
+ n = 0;
skb_frag_foreach_page(frag,
skb_frag_off(frag) + offset - start,
copy, p, p_off, p_len, copied) {
vaddr = kmap_local_page(p);
- n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
+ n += INDIRECT_CALL_1(cb, simple_copy_to_iter,
vaddr + p_off, p_len, data, to);
kunmap_local(vaddr);
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index fd20aae30be2..bbf40b999713 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -434,7 +434,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
page = sg_page(sge);
if (copied + copy > len)
copy = len - copied;
- copy = copy_page_to_iter(page, sge->offset, copy, iter);
+ if (copy)
+ copy = copy_page_to_iter(page, sge->offset, copy, iter);
if (!copy) {
copied = copied ? copied : -EFAULT;
goto out;
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index b2de2108b356..34d76e87847d 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -37,6 +37,8 @@ static int linkstate_get_sqi(struct net_device *dev)
mutex_lock(&phydev->lock);
if (!phydev->drv || !phydev->drv->get_sqi)
ret = -EOPNOTSUPP;
+ else if (!phydev->link)
+ ret = -ENETDOWN;
else
ret = phydev->drv->get_sqi(phydev);
mutex_unlock(&phydev->lock);
@@ -55,6 +57,8 @@ static int linkstate_get_sqi_max(struct net_device *dev)
mutex_lock(&phydev->lock);
if (!phydev->drv || !phydev->drv->get_sqi_max)
ret = -EOPNOTSUPP;
+ else if (!phydev->link)
+ ret = -ENETDOWN;
else
ret = phydev->drv->get_sqi_max(phydev);
mutex_unlock(&phydev->lock);
@@ -62,6 +66,17 @@ static int linkstate_get_sqi_max(struct net_device *dev)
return ret;
};
+static bool linkstate_sqi_critical_error(int sqi)
+{
+ return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN;
+}
+
+static bool linkstate_sqi_valid(struct linkstate_reply_data *data)
+{
+ return data->sqi >= 0 && data->sqi_max >= 0 &&
+ data->sqi <= data->sqi_max;
+}
+
static int linkstate_get_link_ext_state(struct net_device *dev,
struct linkstate_reply_data *data)
{
@@ -93,12 +108,12 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
data->link = __ethtool_get_link(dev);
ret = linkstate_get_sqi(dev);
- if (ret < 0 && ret != -EOPNOTSUPP)
+ if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi = ret;
ret = linkstate_get_sqi_max(dev);
- if (ret < 0 && ret != -EOPNOTSUPP)
+ if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi_max = ret;
@@ -136,11 +151,10 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base,
len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
+ 0;
- if (data->sqi != -EOPNOTSUPP)
- len += nla_total_size(sizeof(u32));
-
- if (data->sqi_max != -EOPNOTSUPP)
- len += nla_total_size(sizeof(u32));
+ if (linkstate_sqi_valid(data)) {
+ len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */
+ len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */
+ }
if (data->link_ext_state_provided)
len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */
@@ -164,13 +178,14 @@ static int linkstate_fill_reply(struct sk_buff *skb,
nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link))
return -EMSGSIZE;
- if (data->sqi != -EOPNOTSUPP &&
- nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
- return -EMSGSIZE;
+ if (linkstate_sqi_valid(data)) {
+ if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
+ return -EMSGSIZE;
- if (data->sqi_max != -EOPNOTSUPP &&
- nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max))
- return -EMSGSIZE;
+ if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX,
+ data->sqi_max))
+ return -EMSGSIZE;
+ }
if (data->link_ext_state_provided) {
if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 47dacb575f74..e0f54b9be850 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2129,8 +2129,16 @@ void tcp_clear_retrans(struct tcp_sock *tp)
static inline void tcp_init_undo(struct tcp_sock *tp)
{
tp->undo_marker = tp->snd_una;
+
/* Retransmission still in flight may cause DSACKs later. */
- tp->undo_retrans = tp->retrans_out ? : -1;
+ /* First, account for regular retransmits in flight: */
+ tp->undo_retrans = tp->retrans_out;
+ /* Next, account for TLP retransmits in flight: */
+ if (tp->tlp_high_seq && tp->tlp_retrans)
+ tp->undo_retrans++;
+ /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */
+ if (!tp->undo_retrans)
+ tp->undo_retrans = -1;
}
static bool tcp_is_rack(const struct sock *sk)
@@ -2209,6 +2217,7 @@ void tcp_enter_loss(struct sock *sk)
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
+ tp->tlp_high_seq = 0;
tcp_ecn_queue_cwr(tp);
/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index ab4b6de0e069..4d40615dc8fc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -479,15 +479,26 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
const struct sk_buff *skb,
u32 rtx_delta)
{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
const struct tcp_sock *tp = tcp_sk(sk);
- const int timeout = TCP_RTO_MAX * 2;
+ int timeout = TCP_RTO_MAX * 2;
s32 rcv_delta;
+ if (user_timeout) {
+ /* If user application specified a TCP_USER_TIMEOUT,
+ * it does not want win 0 packets to 'reset the timer'
+ * while retransmits are not making progress.
+ */
+ if (rtx_delta > user_timeout)
+ return true;
+ timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout));
+ }
/* Note: timer interrupt might have been delayed by at least one jiffy,
* and tp->rcv_tstamp might very well have been written recently.
* rcv_delta can thus be negative.
*/
- rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
+ rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp;
if (rcv_delta <= timeout)
return false;
@@ -532,8 +543,6 @@ void tcp_retransmit_timer(struct sock *sk)
if (WARN_ON_ONCE(!skb))
return;
- tp->tlp_high_seq = 0;
-
if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
!((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
/* Receiver dastardly shrinks window. Our retransmits
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ed97df6af14d..49c622e743e8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -326,6 +326,8 @@ found:
goto fail_unlock;
}
+ sock_set_flag(sk, SOCK_RCU_FREE);
+
sk_add_node_rcu(sk, &hslot->head);
hslot->count++;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -342,7 +344,7 @@ found:
hslot2->count++;
spin_unlock(&hslot2->lock);
}
- sock_set_flag(sk, SOCK_RCU_FREE);
+
error = 0;
fail_unlock:
spin_unlock_bh(&hslot->lock);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4a07834809bb..481ee78e77bc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3866,6 +3866,15 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
nf_tables_rule_destroy(ctx, rule);
}
+/** nft_chain_validate - loop detection and hook validation
+ *
+ * @ctx: context containing call depth and base chain
+ * @chain: chain to validate
+ *
+ * Walk through the rules of the given chain and chase all jumps/gotos
+ * and set lookups until either the jump limit is hit or all reachable
+ * chains have been validated.
+ */
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
{
struct nft_expr *expr, *last;
@@ -3887,6 +3896,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (!expr->ops->validate)
continue;
+ /* This may call nft_chain_validate() recursively,
+ * callers that do so must increment ctx->level.
+ */
err = expr->ops->validate(ctx, expr, &data);
if (err < 0)
return err;
@@ -10879,150 +10891,6 @@ int nft_chain_validate_hooks(const struct nft_chain *chain,
}
EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
-/*
- * Loop detection - walk through the ruleset beginning at the destination chain
- * of a new jump until either the source chain is reached (loop) or all
- * reachable chains have been traversed.
- *
- * The loop check is performed whenever a new jump verdict is added to an
- * expression or verdict map or a verdict map is bound to a new chain.
- */
-
-static int nf_tables_check_loops(const struct nft_ctx *ctx,
- const struct nft_chain *chain);
-
-static int nft_check_loops(const struct nft_ctx *ctx,
- const struct nft_set_ext *ext)
-{
- const struct nft_data *data;
- int ret;
-
- data = nft_set_ext_data(ext);
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- ret = nf_tables_check_loops(ctx, data->verdict.chain);
- break;
- default:
- ret = 0;
- break;
- }
-
- return ret;
-}
-
-static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
- struct nft_set *set,
- const struct nft_set_iter *iter,
- struct nft_elem_priv *elem_priv)
-{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
-
- if (!nft_set_elem_active(ext, iter->genmask))
- return 0;
-
- if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
- *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
- return 0;
-
- return nft_check_loops(ctx, ext);
-}
-
-static int nft_set_catchall_loops(const struct nft_ctx *ctx,
- struct nft_set *set)
-{
- u8 genmask = nft_genmask_next(ctx->net);
- struct nft_set_elem_catchall *catchall;
- struct nft_set_ext *ext;
- int ret = 0;
-
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
- ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask))
- continue;
-
- ret = nft_check_loops(ctx, ext);
- if (ret < 0)
- return ret;
- }
-
- return ret;
-}
-
-static int nf_tables_check_loops(const struct nft_ctx *ctx,
- const struct nft_chain *chain)
-{
- const struct nft_rule *rule;
- const struct nft_expr *expr, *last;
- struct nft_set *set;
- struct nft_set_binding *binding;
- struct nft_set_iter iter;
-
- if (ctx->chain == chain)
- return -ELOOP;
-
- if (fatal_signal_pending(current))
- return -EINTR;
-
- list_for_each_entry(rule, &chain->rules, list) {
- nft_rule_for_each_expr(expr, last, rule) {
- struct nft_immediate_expr *priv;
- const struct nft_data *data;
- int err;
-
- if (strcmp(expr->ops->type->name, "immediate"))
- continue;
-
- priv = nft_expr_priv(expr);
- if (priv->dreg != NFT_REG_VERDICT)
- continue;
-
- data = &priv->data;
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- err = nf_tables_check_loops(ctx,
- data->verdict.chain);
- if (err < 0)
- return err;
- break;
- default:
- break;
- }
- }
- }
-
- list_for_each_entry(set, &ctx->table->sets, list) {
- if (!nft_is_active_next(ctx->net, set))
- continue;
- if (!(set->flags & NFT_SET_MAP) ||
- set->dtype != NFT_DATA_VERDICT)
- continue;
-
- list_for_each_entry(binding, &set->bindings, list) {
- if (!(binding->flags & NFT_SET_MAP) ||
- binding->chain != chain)
- continue;
-
- iter.genmask = nft_genmask_next(ctx->net);
- iter.type = NFT_ITER_UPDATE;
- iter.skip = 0;
- iter.count = 0;
- iter.err = 0;
- iter.fn = nf_tables_loop_check_setelem;
-
- set->ops->walk(ctx, set, &iter);
- if (!iter.err)
- iter.err = nft_set_catchall_loops(ctx, set);
-
- if (iter.err < 0)
- return iter.err;
- }
- }
-
- return 0;
-}
-
/**
* nft_parse_u32_check - fetch u32 attribute and check for maximum value
*
@@ -11135,7 +11003,7 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
if (data != NULL &&
(data->verdict.code == NFT_GOTO ||
data->verdict.code == NFT_JUMP)) {
- err = nf_tables_check_loops(ctx, data->verdict.chain);
+ err = nft_chain_validate(ctx, data->verdict.chain);
if (err < 0)
return err;
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f1c31757e496..55e28e1da66e 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -325,7 +325,7 @@ static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
hooks = nf_hook_entries_head(net, pf, entry->state.hook);
i = entry->hook_index;
- if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
+ if (!hooks || i >= hooks->num_hook_entries) {
kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP);
nf_queue_entry_free(entry);
return;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index a6b7c514a181..113b907da0f7 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1081,6 +1081,14 @@ do_nat:
err = nf_conntrack_confirm(skb);
if (err != NF_ACCEPT)
goto nf_error;
+
+ /* The ct may be dropped if a clash has been resolved,
+ * so it's necessary to retrieve it from skb again to
+ * prevent UAF.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ skip_add = true;
}
if (!skip_add)
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index c2ef9dcf91d2..cc6051d4f2ef 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -91,7 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, true, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, true);
@@ -121,7 +121,7 @@ static void ingress_destroy(struct Qdisc *sch)
tcf_block_put_ext(q->block, sch, &q->block_info);
if (entry) {
- tcx_miniq_set_active(entry, false);
+ tcx_miniq_dec(entry);
if (!tcx_entry_is_active(entry)) {
tcx_entry_update(dev, NULL, true);
tcx_entry_free(entry);
@@ -257,7 +257,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, true, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, true);
@@ -276,7 +276,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, false, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, false);
@@ -302,7 +302,7 @@ static void clsact_destroy(struct Qdisc *sch)
tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
if (ingress_entry) {
- tcx_miniq_set_active(ingress_entry, false);
+ tcx_miniq_dec(ingress_entry);
if (!tcx_entry_is_active(ingress_entry)) {
tcx_entry_update(dev, NULL, true);
tcx_entry_free(ingress_entry);
@@ -310,7 +310,7 @@ static void clsact_destroy(struct Qdisc *sch)
}
if (egress_entry) {
- tcx_miniq_set_active(egress_entry, false);
+ tcx_miniq_dec(egress_entry);
if (!tcx_entry_is_active(egress_entry)) {
tcx_entry_update(dev, NULL, false);
tcx_entry_free(egress_entry);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index dfc353eea8ed..0e1691316f42 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2441,6 +2441,13 @@ static void xs_tcp_setup_socket(struct work_struct *work)
transport->srcport = 0;
status = -EAGAIN;
break;
+ case -EPERM:
+ /* Happens, for instance, if a BPF program is preventing
+ * the connect. Remap the error so upper layers can better
+ * deal with it.
+ */
+ status = -ECONNREFUSED;
+ fallthrough;
case -EINVAL:
/* Happens, for instance, if the user specified a link
* local IPv6 address without a scope-id.
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index abdd22007ed8..e4a79a9b2d58 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -427,8 +427,6 @@ static void __init remove_securityfs_measurement_lists(struct dentry **lists)
kfree(lists);
}
-
- securityfs_measurement_list_count = 0;
}
static int __init create_securityfs_measurement_lists(void)
@@ -625,6 +623,7 @@ out:
securityfs_remove(binary_runtime_measurements);
remove_securityfs_measurement_lists(ascii_securityfs_measurement_lists);
remove_securityfs_measurement_lists(binary_securityfs_measurement_lists);
+ securityfs_measurement_list_count = 0;
securityfs_remove(ima_symlink);
securityfs_remove(ima_dir);
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 233f2b6edf52..49b79cf0c5cc 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -86,14 +86,6 @@ static struct comm_str *comm_str__new(const char *str)
return result;
}
-static int comm_str__cmp(const void *_lhs, const void *_rhs)
-{
- const struct comm_str *lhs = *(const struct comm_str * const *)_lhs;
- const struct comm_str *rhs = *(const struct comm_str * const *)_rhs;
-
- return strcmp(comm_str__str(lhs), comm_str__str(rhs));
-}
-
static int comm_str__search(const void *_key, const void *_member)
{
const char *key = _key;
@@ -169,9 +161,24 @@ static struct comm_str *comm_strs__findnew(const char *str)
}
result = comm_str__new(str);
if (result) {
- comm_strs->strs[comm_strs->num_strs++] = result;
- qsort(comm_strs->strs, comm_strs->num_strs, sizeof(struct comm_str *),
- comm_str__cmp);
+ int low = 0, high = comm_strs->num_strs - 1;
+ int insert = comm_strs->num_strs; /* Default to inserting at the end. */
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int cmp = strcmp(comm_str__str(comm_strs->strs[mid]), str);
+
+ if (cmp < 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ insert = mid;
+ }
+ }
+ memmove(&comm_strs->strs[insert + 1], &comm_strs->strs[insert],
+ (comm_strs->num_strs - insert) * sizeof(struct comm_str *));
+ comm_strs->num_strs++;
+ comm_strs->strs[insert] = result;
}
}
up_write(&comm_strs->lock);
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index ab3d0c01dd63..a69a9c661200 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -203,11 +203,27 @@ int __dsos__add(struct dsos *dsos, struct dso *dso)
dsos->dsos = temp;
dsos->allocated = to_allocate;
}
- dsos->dsos[dsos->cnt++] = dso__get(dso);
- if (dsos->cnt >= 2 && dsos->sorted) {
- dsos->sorted = dsos__cmp_long_name_id_short_name(&dsos->dsos[dsos->cnt - 2],
- &dsos->dsos[dsos->cnt - 1])
- <= 0;
+ if (!dsos->sorted) {
+ dsos->dsos[dsos->cnt++] = dso__get(dso);
+ } else {
+ int low = 0, high = dsos->cnt - 1;
+ int insert = dsos->cnt; /* Default to inserting at the end. */
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int cmp = dsos__cmp_long_name_id_short_name(&dsos->dsos[mid], &dso);
+
+ if (cmp < 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ insert = mid;
+ }
+ }
+ memmove(&dsos->dsos[insert + 1], &dsos->dsos[insert],
+ (dsos->cnt - insert) * sizeof(struct dso *));
+ dsos->cnt++;
+ dsos->dsos[insert] = dso__get(dso);
}
dso__set_dsos(dso, dsos);
return 0;
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 5291e97df749..4ca84c8d9116 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -58,9 +58,12 @@ CONFIG_MPLS=y
CONFIG_MPLS_IPTUNNEL=y
CONFIG_MPLS_ROUTING=y
CONFIG_MPTCP=y
+CONFIG_NET_ACT_SKBMOD=y
+CONFIG_NET_CLS=y
CONFIG_NET_CLS_ACT=y
CONFIG_NET_CLS_BPF=y
CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_CLS_MATCHALL=y
CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NET_IPGRE=y
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c
index bc9841144685..1af9ec1149aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_links.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c
@@ -9,6 +9,8 @@
#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null"
#include "test_tc_link.skel.h"
+
+#include "netlink_helpers.h"
#include "tc_helpers.h"
void serial_test_tc_links_basic(void)
@@ -1787,6 +1789,65 @@ void serial_test_tc_links_ingress(void)
test_tc_links_ingress(BPF_TCX_INGRESS, false, false);
}
+struct qdisc_req {
+ struct nlmsghdr n;
+ struct tcmsg t;
+ char buf[1024];
+};
+
+static int qdisc_replace(int ifindex, const char *kind, bool block)
+{
+ struct rtnl_handle rth = { .fd = -1 };
+ struct qdisc_req req;
+ int err;
+
+ err = rtnl_open(&rth, 0);
+ if (!ASSERT_OK(err, "open_rtnetlink"))
+ return err;
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST;
+ req.n.nlmsg_type = RTM_NEWQDISC;
+ req.t.tcm_family = AF_UNSPEC;
+ req.t.tcm_ifindex = ifindex;
+ req.t.tcm_parent = 0xfffffff1;
+
+ addattr_l(&req.n, sizeof(req), TCA_KIND, kind, strlen(kind) + 1);
+ if (block)
+ addattr32(&req.n, sizeof(req), TCA_INGRESS_BLOCK, 1);
+
+ err = rtnl_talk(&rth, &req.n, NULL);
+ ASSERT_OK(err, "talk_rtnetlink");
+ rtnl_close(&rth);
+ return err;
+}
+
+void serial_test_tc_links_dev_chain0(void)
+{
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev foo type veth peer name bar"), "add veth");
+ ifindex = if_nametoindex("foo");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+ err = qdisc_replace(ifindex, "ingress", true);
+ if (!ASSERT_OK(err, "attaching ingress"))
+ goto cleanup;
+ ASSERT_OK(system("tc filter add block 1 matchall action skbmod swap mac"), "add block");
+ err = qdisc_replace(ifindex, "clsact", false);
+ if (!ASSERT_OK(err, "attaching clsact"))
+ goto cleanup;
+ /* Heuristic: kern_sync_rcu() alone does not work; a wait-time of ~5s
+ * triggered the issue without the fix reliably 100% of the time.
+ */
+ sleep(5);
+ ASSERT_OK(system("tc filter add dev foo ingress matchall action skbmod swap mac"), "add filter");
+cleanup:
+ ASSERT_OK(system("ip link del dev foo"), "del veth");
+ ASSERT_EQ(if_nametoindex("foo"), 0, "foo removed");
+ ASSERT_EQ(if_nametoindex("bar"), 0, "bar removed");
+}
+
static void test_tc_links_dev_mixed(int target)
{
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
new file mode 100644
index 000000000000..871d16cb95cf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include <pthread.h>
+#include <network_helpers.h>
+
+#include "timer_lockup.skel.h"
+
+static long cpu;
+static int *timer1_err;
+static int *timer2_err;
+static bool skip;
+
+volatile int k = 0;
+
+static void *timer_lockup_thread(void *arg)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1000,
+ );
+ int i, prog_fd = *(int *)arg;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
+ ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset),
+ &cpuset),
+ "cpu affinity");
+
+ for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) {
+ bpf_prog_test_run_opts(prog_fd, &opts);
+ /* Skip the test if we can't reproduce the race in a reasonable
+ * amount of time.
+ */
+ if (i > 50) {
+ WRITE_ONCE(skip, true);
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+void test_timer_lockup(void)
+{
+ int timer1_prog, timer2_prog;
+ struct timer_lockup *skel;
+ pthread_t thrds[2];
+ void *ret;
+
+ skel = timer_lockup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load"))
+ return;
+
+ timer1_prog = bpf_program__fd(skel->progs.timer1_prog);
+ timer2_prog = bpf_program__fd(skel->progs.timer2_prog);
+
+ timer1_err = &skel->bss->timer1_err;
+ timer2_err = &skel->bss->timer2_err;
+
+ if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread,
+ &timer1_prog),
+ "pthread_create thread1"))
+ goto out;
+ if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread,
+ &timer2_prog),
+ "pthread_create thread2")) {
+ pthread_exit(&thrds[0]);
+ goto out;
+ }
+
+ pthread_join(thrds[1], &ret);
+ pthread_join(thrds[0], &ret);
+
+ if (skip) {
+ test__skip();
+ goto out;
+ }
+
+ if (*timer1_err != -EDEADLK && *timer1_err != 0)
+ ASSERT_FAIL("timer1_err bad value");
+ if (*timer2_err != -EDEADLK && *timer2_err != 0)
+ ASSERT_FAIL("timer2_err bad value");
+out:
+ timer_lockup__destroy(skel);
+ return;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_lockup.c b/tools/testing/selftests/bpf/progs/timer_lockup.c
new file mode 100644
index 000000000000..3e520133281e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_lockup.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer1_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer2_map SEC(".maps");
+
+int timer1_err;
+int timer2_err;
+
+static int timer_cb1(void *map, int *k, struct elem *v)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer2_map, &key);
+ if (timer)
+ timer2_err = bpf_timer_cancel(timer);
+
+ return 0;
+}
+
+static int timer_cb2(void *map, int *k, struct elem *v)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer1_map, &key);
+ if (timer)
+ timer1_err = bpf_timer_cancel(timer);
+
+ return 0;
+}
+
+SEC("tc")
+int timer1_prog(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer1_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb1);
+ bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
+ }
+
+ return 0;
+}
+
+SEC("tc")
+int timer2_prog(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer2_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb2);
+ bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk
index b909bee3cb2a..abb9e58d95c4 100644
--- a/tools/testing/selftests/powerpc/flags.mk
+++ b/tools/testing/selftests/powerpc/flags.mk
@@ -5,8 +5,5 @@ GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown")
export GIT_VERSION
endif
-ifeq ($(CFLAGS),)
-CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(CFLAGS)
+CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(USERCFLAGS)
export CFLAGS
-endif
-
diff --git a/tools/testing/selftests/riscv/sigreturn/sigreturn.c b/tools/testing/selftests/riscv/sigreturn/sigreturn.c
index 62397d5934f1..ed351a1cb917 100644
--- a/tools/testing/selftests/riscv/sigreturn/sigreturn.c
+++ b/tools/testing/selftests/riscv/sigreturn/sigreturn.c
@@ -51,7 +51,7 @@ static int vector_sigreturn(int data, void (*handler)(int, siginfo_t *, void *))
asm(".option push \n\
.option arch, +v \n\
- vsetivli x0, 1, e32, ta, ma \n\
+ vsetivli x0, 1, e32, m1, ta, ma \n\
vmv.s.x v0, %1 \n\
# Generate SIGSEGV \n\
lw a0, 0(x0) \n\
diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c
index e40dc5be2f66..d12ff955de0d 100644
--- a/tools/testing/selftests/timens/exec.c
+++ b/tools/testing/selftests/timens/exec.c
@@ -30,7 +30,7 @@ int main(int argc, char *argv[])
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now.tv_sec) > 5)
+ if (labs(tst.tv_sec - now.tv_sec) > 5)
return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec);
}
return 0;
@@ -50,7 +50,7 @@ int main(int argc, char *argv[])
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now.tv_sec) > 5)
+ if (labs(tst.tv_sec - now.tv_sec) > 5)
return pr_fail("%ld %ld\n",
now.tv_sec, tst.tv_sec);
}
@@ -70,7 +70,7 @@ int main(int argc, char *argv[])
/* Check that a child process is in the new timens. */
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now.tv_sec - OFFSET) > 5)
+ if (labs(tst.tv_sec - now.tv_sec - OFFSET) > 5)
return pr_fail("%ld %ld\n",
now.tv_sec + OFFSET, tst.tv_sec);
}
diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c
index 5e7f0051bd7b..5b939f59dfa4 100644
--- a/tools/testing/selftests/timens/timer.c
+++ b/tools/testing/selftests/timens/timer.c
@@ -56,7 +56,7 @@ int run_test(int clockid, struct timespec now)
return pr_perror("timerfd_gettime");
elapsed = new_value.it_value.tv_sec;
- if (abs(elapsed - 3600) > 60) {
+ if (llabs(elapsed - 3600) > 60) {
ksft_test_result_fail("clockid: %d elapsed: %lld\n",
clockid, elapsed);
return 1;
diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c
index 9edd43d6b2c1..a4196bbd6e33 100644
--- a/tools/testing/selftests/timens/timerfd.c
+++ b/tools/testing/selftests/timens/timerfd.c
@@ -61,7 +61,7 @@ int run_test(int clockid, struct timespec now)
return pr_perror("timerfd_gettime(%d)", clockid);
elapsed = new_value.it_value.tv_sec;
- if (abs(elapsed - 3600) > 60) {
+ if (llabs(elapsed - 3600) > 60) {
ksft_test_result_fail("clockid: %d elapsed: %lld\n",
clockid, elapsed);
return 1;
diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c
index beb7614941fb..5b8907bf451d 100644
--- a/tools/testing/selftests/timens/vfork_exec.c
+++ b/tools/testing/selftests/timens/vfork_exec.c
@@ -32,7 +32,7 @@ static void *tcheck(void *_args)
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now->tv_sec) > 5) {
+ if (labs(tst.tv_sec - now->tv_sec) > 5) {
pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n",
args->tst_name, tst.tv_sec, now->tv_sec);
return (void *)1UL;
@@ -64,7 +64,7 @@ static int check(char *tst_name, struct timespec *now)
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now->tv_sec) > 5)
+ if (labs(tst.tv_sec - now->tv_sec) > 5)
return pr_fail("%s: unexpected value: %ld (%ld)\n",
tst_name, tst.tv_sec, now->tv_sec);
}
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index d53a4d8008f9..98d8ba2afa00 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -1,35 +1,30 @@
# SPDX-License-Identifier: GPL-2.0
-include ../lib.mk
-
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres
+TEST_GEN_PROGS := vdso_test_gettimeofday
+TEST_GEN_PROGS += vdso_test_getcpu
+TEST_GEN_PROGS += vdso_test_abi
+TEST_GEN_PROGS += vdso_test_clock_getres
ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
-TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
+TEST_GEN_PROGS += vdso_standalone_test_x86
endif
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness
+TEST_GEN_PROGS += vdso_test_correctness
CFLAGS := -std=gnu99
-CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
-LDFLAGS_vdso_test_correctness := -ldl
+
ifeq ($(CONFIG_X86_32),y)
LDLIBS += -lgcc_s
endif
-all: $(TEST_GEN_PROGS)
+include ../lib.mk
$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c
+
$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
- $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
- vdso_standalone_test_x86.c parse_vdso.c \
- -o $@
+$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+
$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c
- $(CC) $(CFLAGS) \
- vdso_test_correctness.c \
- -o $@ \
- $(LDFLAGS_vdso_test_correctness)
+$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
index 413f75620a35..4ae417372e9e 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.c
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -55,14 +55,20 @@ static struct vdso_info
ELF(Verdef) *verdef;
} vdso_info;
-/* Straight from the ELF specification. */
-static unsigned long elf_hash(const unsigned char *name)
+/*
+ * Straight from the ELF specification...and then tweaked slightly, in order to
+ * avoid a few clang warnings.
+ */
+static unsigned long elf_hash(const char *name)
{
unsigned long h = 0, g;
- while (*name)
+ const unsigned char *uch_name = (const unsigned char *)name;
+
+ while (*uch_name)
{
- h = (h << 4) + *name++;
- if (g = h & 0xf0000000)
+ h = (h << 4) + *uch_name++;
+ g = h & 0xf0000000;
+ if (g)
h ^= g >> 24;
h &= ~g;
}
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
index 8a44ff973ee1..27f6fdf11969 100644
--- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -18,7 +18,7 @@
#include "parse_vdso.h"
-/* We need a libc functions... */
+/* We need some libc functions... */
int strcmp(const char *a, const char *b)
{
/* This implementation is buggy: it never returns -1. */
@@ -34,6 +34,20 @@ int strcmp(const char *a, const char *b)
return 0;
}
+/*
+ * The clang build needs this, although gcc does not.
+ * Stolen from lib/string.c.
+ */
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ char *tmp = dest;
+ const char *s = src;
+
+ while (count--)
+ *tmp++ = *s++;
+ return dest;
+}
+
/* ...and two syscalls. This is x86-specific. */
static inline long x86_syscall3(long nr, long a0, long a1, long a2)
{
@@ -70,7 +84,7 @@ void to_base10(char *lastdig, time_t n)
}
}
-__attribute__((externally_visible)) void c_main(void **stack)
+void c_main(void **stack)
{
/* Parse the stack */
long argc = (long)*stack;
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index e95bd56b332f..35856b11c143 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -109,9 +109,9 @@ KERNEL_ARCH := x86_64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off
else
-QEMU_MACHINE := -cpu max -machine microvm -no-acpi
+QEMU_MACHINE := -cpu max -machine microvm,acpi=off
endif
else ifeq ($(ARCH),i686)
CHOST := i686-linux-musl
@@ -120,9 +120,9 @@ KERNEL_ARCH := x86
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
-QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off
else
-QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi
+QEMU_MACHINE := -cpu coreduo -machine microvm,acpi=off
endif
else ifeq ($(ARCH),mips64)
CHOST := mips64-linux-musl