diff options
Diffstat (limited to 'arch/riscv')
114 files changed, 2298 insertions, 733 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d4a7ca0388c0..10116f68569d 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -24,6 +24,9 @@ config RISCV select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CRC32 if RISCV_ISA_ZBC + select ARCH_HAS_CRC64 if 64BIT && RISCV_ISA_ZBC + select ARCH_HAS_CRC_T10DIF if RISCV_ISA_ZBC select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE @@ -52,7 +55,7 @@ config RISCV select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN - select ARCH_HAS_VDSO_TIME_DATA + select ARCH_HAS_VDSO_ARCH_DATA if GENERIC_VDSO_DATA_STORE select ARCH_KEEP_MEMBLOCK if ACPI select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if 64BIT && MMU select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX @@ -110,11 +113,13 @@ config RISCV select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_LIB_DEVMEM_IS_ALLOWED + select GENERIC_PENDING_IRQ if SMP select GENERIC_PCI_IOMAP select GENERIC_PTDUMP if MMU select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL if MMU && 64BIT + select GENERIC_VDSO_DATA_STORE if MMU select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND select HAS_IOPORT if MMU @@ -146,9 +151,10 @@ config RISCV select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_GRAPH_FUNC select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_FREGS select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION select HAVE_EBPF_JIT if MMU select HAVE_GUP_FAST if MMU diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index 2acc7d876e1f..e318119d570d 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -119,4 +119,15 @@ config ERRATA_THEAD_PMU If you don't know what to do here, say "Y". +config ERRATA_THEAD_GHOSTWRITE + bool "Apply T-Head Ghostwrite errata" + depends on ERRATA_THEAD && RISCV_ISA_XTHEADVECTOR + default y + help + The T-Head C9xx cores have a vulnerability in the xtheadvector + instruction set. When this errata is enabled, the CPUs will be probed + to determine if they are vulnerable and disable xtheadvector. + + If you don't know what to do here, say "Y". + endmenu # "CPU errata selection" diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index f51bb24bc84c..17606940bb52 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -24,6 +24,12 @@ config ARCH_SOPHGO help This enables support for Sophgo SoC platform hardware. +config ARCH_SPACEMIT + bool "SpacemiT SoCs" + select PINCTRL + help + This enables support for SpacemiT SoC platform hardware. + config ARCH_STARFIVE def_bool SOC_STARFIVE diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor index 6f1cdd32ed29..b096548fe0ff 100644 --- a/arch/riscv/Kconfig.vendor +++ b/arch/riscv/Kconfig.vendor @@ -16,4 +16,30 @@ config RISCV_ISA_VENDOR_EXT_ANDES If you don't know what to do here, say Y. endmenu +menu "T-Head" +config RISCV_ISA_VENDOR_EXT_THEAD + bool "T-Head vendor extension support" + select RISCV_ISA_VENDOR_EXT + default y + help + Say N here to disable detection of and support for all T-Head vendor + extensions. Without this option enabled, T-Head vendor extensions will + not be detected at boot and their presence not reported to userspace. + + If you don't know what to do here, say Y. + +config RISCV_ISA_XTHEADVECTOR + bool "xtheadvector extension support" + depends on RISCV_ISA_VENDOR_EXT_THEAD + depends on RISCV_ISA_V + depends on FPU + default y + help + Say N here if you want to disable all xtheadvector related procedures + in the kernel. This will disable vector for any T-Head board that + contains xtheadvector rather than the standard vector. + + If you don't know what to do here, say Y. +endmenu + endmenu diff --git a/arch/riscv/Makefile.postlink b/arch/riscv/Makefile.postlink index 829b9abc91f6..6b0580949b6a 100644 --- a/arch/riscv/Makefile.postlink +++ b/arch/riscv/Makefile.postlink @@ -10,6 +10,7 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include +include $(srctree)/scripts/Makefile.lib quiet_cmd_relocs_check = CHKREL $@ cmd_relocs_check = \ @@ -19,11 +20,6 @@ ifdef CONFIG_RELOCATABLE quiet_cmd_cp_vmlinux_relocs = CPREL vmlinux.relocs cmd_cp_vmlinux_relocs = cp vmlinux vmlinux.relocs -quiet_cmd_relocs_strip = STRIPREL $@ -cmd_relocs_strip = $(OBJCOPY) --remove-section='.rel.*' \ - --remove-section='.rel__*' \ - --remove-section='.rela.*' \ - --remove-section='.rela__*' $@ endif # `@true` prevents complaint when there is nothing to be done @@ -33,7 +29,7 @@ vmlinux: FORCE ifdef CONFIG_RELOCATABLE $(call if_changed,relocs_check) $(call if_changed,cp_vmlinux_relocs) - $(call if_changed,relocs_strip) + $(call if_changed,strip_relocs) endif clean: diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile index fdae05bbf556..bff887d38abe 100644 --- a/arch/riscv/boot/dts/Makefile +++ b/arch/riscv/boot/dts/Makefile @@ -5,6 +5,7 @@ subdir-y += microchip subdir-y += renesas subdir-y += sifive subdir-y += sophgo +subdir-y += spacemit subdir-y += starfive subdir-y += thead diff --git a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi index 64c3c2e6cbe0..6367112e614a 100644 --- a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi +++ b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi @@ -27,7 +27,8 @@ riscv,isa = "rv64imafdc"; riscv,isa-base = "rv64i"; riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "zicntr", "zicsr", - "zifencei", "zihpm"; + "zifencei", "zihpm", "xtheadvector"; + thead,vlenb = <128>; #cooling-cells = <2>; cpu0_intc: interrupt-controller { diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi index 1069134f2e12..a6dda55a2d1d 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi @@ -32,8 +32,9 @@ #interrupt-cells = <0x1>; #size-cells = <0x2>; device_type = "pci"; - reg = <0x30 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; - reg-names = "cfg", "apb"; + reg = <0x30 0x0 0x0 0x8000000>, <0x0 0x43008000 0x0 0x2000>, + <0x0 0x4300a000 0x0 0x2000>; + reg-names = "cfg", "bridge", "ctrl"; bus-range = <0x0 0x7f>; interrupt-parent = <&plic>; interrupts = <119>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi index 8230f06ddf48..36a9860f31da 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi @@ -20,8 +20,9 @@ #interrupt-cells = <0x1>; #size-cells = <0x2>; device_type = "pci"; - reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; - reg-names = "cfg", "apb"; + reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43008000 0x0 0x2000>, + <0x0 0x4300a000 0x0 0x2000>; + reg-names = "cfg", "bridge", "ctrl"; bus-range = <0x0 0x7f>; interrupt-parent = <&plic>; interrupts = <119>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi index 9a56de7b91d6..a57dca891965 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi @@ -20,8 +20,9 @@ #interrupt-cells = <0x1>; #size-cells = <0x2>; device_type = "pci"; - reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; - reg-names = "cfg", "apb"; + reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43008000 0x0 0x2000>, + <0x0 0x4300a000 0x0 0x2000>; + reg-names = "cfg", "bridge", "ctrl"; bus-range = <0x0 0x7f>; interrupt-parent = <&plic>; interrupts = <119>; diff --git a/arch/riscv/boot/dts/sophgo/sg2042-milkv-pioneer.dts b/arch/riscv/boot/dts/sophgo/sg2042-milkv-pioneer.dts index be596d01ff8d..34645a5f6038 100644 --- a/arch/riscv/boot/dts/sophgo/sg2042-milkv-pioneer.dts +++ b/arch/riscv/boot/dts/sophgo/sg2042-milkv-pioneer.dts @@ -73,6 +73,13 @@ }; / { + pwmfan: pwm-fan { + compatible = "pwm-fan"; + cooling-levels = <103 128 179 230 255>; + pwms = <&pwm 0 40000 0>; + #cooling-cells = <2>; + }; + thermal-zones { soc-thermal { polling-delay-passive = <1000>; @@ -104,6 +111,28 @@ type = "hot"; }; }; + + cooling-maps { + map0 { + trip = <&soc_active1>; + cooling-device = <&pwmfan 0 1>; + }; + + map1 { + trip = <&soc_active2>; + cooling-device = <&pwmfan 1 2>; + }; + + map2 { + trip = <&soc_active3>; + cooling-device = <&pwmfan 2 3>; + }; + + map3 { + trip = <&soc_hot>; + cooling-device = <&pwmfan 3 4>; + }; + }; }; board-thermal { @@ -118,6 +147,13 @@ type = "active"; }; }; + + cooling-maps { + map4 { + trip = <&board_active>; + cooling-device = <&pwmfan 3 4>; + }; + }; }; }; }; diff --git a/arch/riscv/boot/dts/sophgo/sg2042.dtsi b/arch/riscv/boot/dts/sophgo/sg2042.dtsi index e62ac51ac55a..aa8b7fcc125d 100644 --- a/arch/riscv/boot/dts/sophgo/sg2042.dtsi +++ b/arch/riscv/boot/dts/sophgo/sg2042.dtsi @@ -165,6 +165,15 @@ }; }; + pwm: pwm@703000c000 { + compatible = "sophgo,sg2042-pwm"; + reg = <0x70 0x3000c000 0x0 0x20>; + #pwm-cells = <3>; + clocks = <&clkgen GATE_CLK_APB_PWM>; + clock-names = "apb"; + resets = <&rstgen RST_PWM>; + }; + pllclk: clock-controller@70300100c0 { compatible = "sophgo,sg2042-pll"; reg = <0x70 0x300100c0 0x0 0x40>; @@ -173,6 +182,16 @@ #clock-cells = <1>; }; + msi: msi-controller@7030010304 { + compatible = "sophgo,sg2042-msi"; + reg = <0x70 0x30010304 0x0 0x4>, + <0x70 0x30010300 0x0 0x4>; + reg-names = "clr", "doorbell"; + msi-controller; + #msi-cells = <0>; + msi-ranges = <&intc 64 IRQ_TYPE_LEVEL_HIGH 32>; + }; + rpgate: clock-controller@7030010368 { compatible = "sophgo,sg2042-rpgate"; reg = <0x70 0x30010368 0x0 0x98>; diff --git a/arch/riscv/boot/dts/spacemit/Makefile b/arch/riscv/boot/dts/spacemit/Makefile new file mode 100644 index 000000000000..92e13ce1c16d --- /dev/null +++ b/arch/riscv/boot/dts/spacemit/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +dtb-$(CONFIG_ARCH_SPACEMIT) += k1-bananapi-f3.dtb +dtb-$(CONFIG_ARCH_SPACEMIT) += k1-milkv-jupiter.dtb diff --git a/arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts b/arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts new file mode 100644 index 000000000000..1d617b40a2d5 --- /dev/null +++ b/arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2024 Yangyu Chen <cyy@cyyself.name> + */ + +#include "k1.dtsi" +#include "k1-pinctrl.dtsi" + +/ { + model = "Banana Pi BPI-F3"; + compatible = "bananapi,bpi-f3", "spacemit,k1"; + + aliases { + serial0 = &uart0; + }; + + chosen { + stdout-path = "serial0"; + }; +}; + +&uart0 { + pinctrl-names = "default"; + pinctrl-0 = <&uart0_2_cfg>; + status = "okay"; +}; diff --git a/arch/riscv/boot/dts/spacemit/k1-milkv-jupiter.dts b/arch/riscv/boot/dts/spacemit/k1-milkv-jupiter.dts new file mode 100644 index 000000000000..448319214104 --- /dev/null +++ b/arch/riscv/boot/dts/spacemit/k1-milkv-jupiter.dts @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2024 Yangyu Chen <cyy@cyyself.name> + * Copyright (C) 2025 Javier Martinez Canillas <javierm@redhat.com> + */ + +#include "k1.dtsi" +#include "k1-pinctrl.dtsi" + +/ { + model = "Milk-V Jupiter (K1)"; + compatible = "milkv,jupiter", "spacemit,k1"; + + aliases { + serial0 = &uart0; + }; + + chosen { + stdout-path = "serial0"; + }; +}; + +&uart0 { + pinctrl-names = "default"; + pinctrl-0 = <&uart0_2_cfg>; + status = "okay"; +}; diff --git a/arch/riscv/boot/dts/spacemit/k1-pinctrl.dtsi b/arch/riscv/boot/dts/spacemit/k1-pinctrl.dtsi new file mode 100644 index 000000000000..a8eac5517f85 --- /dev/null +++ b/arch/riscv/boot/dts/spacemit/k1-pinctrl.dtsi @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (c) 2024 Yixun Lan <dlan@gentoo.org> + */ + +#include <dt-bindings/gpio/gpio.h> + +#define K1_PADCONF(pin, func) (((pin) << 16) | (func)) + +&pinctrl { + uart0_2_cfg: uart0-2-cfg { + uart0-2-pins { + pinmux = <K1_PADCONF(68, 2)>, + <K1_PADCONF(69, 2)>; + + bias-pull-up = <0>; + drive-strength = <32>; + }; + }; +}; diff --git a/arch/riscv/boot/dts/spacemit/k1.dtsi b/arch/riscv/boot/dts/spacemit/k1.dtsi new file mode 100644 index 000000000000..c670ebf8fa12 --- /dev/null +++ b/arch/riscv/boot/dts/spacemit/k1.dtsi @@ -0,0 +1,452 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2024 Yangyu Chen <cyy@cyyself.name> + */ + +/dts-v1/; +/ { + #address-cells = <2>; + #size-cells = <2>; + model = "SpacemiT K1"; + compatible = "spacemit,k1"; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + timebase-frequency = <24000000>; + + cpu-map { + cluster0 { + core0 { + cpu = <&cpu_0>; + }; + core1 { + cpu = <&cpu_1>; + }; + core2 { + cpu = <&cpu_2>; + }; + core3 { + cpu = <&cpu_3>; + }; + }; + + cluster1 { + core0 { + cpu = <&cpu_4>; + }; + core1 { + cpu = <&cpu_5>; + }; + core2 { + cpu = <&cpu_6>; + }; + core3 { + cpu = <&cpu_7>; + }; + }; + }; + + cpu_0: cpu@0 { + compatible = "spacemit,x60", "riscv"; + device_type = "cpu"; + reg = <0>; + riscv,isa = "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_zkt_zvfh_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "v", "zicbom", + "zicbop", "zicboz", "zicntr", "zicond", "zicsr", + "zifencei", "zihintpause", "zihpm", "zfh", "zba", + "zbb", "zbc", "zbs", "zkt", "zvfh", "zvkt", + "sscofpmf", "sstc", "svinval", "svnapot", "svpbmt"; + riscv,cbom-block-size = <64>; + riscv,cbop-block-size = <64>; + riscv,cboz-block-size = <64>; + i-cache-block-size = <64>; + i-cache-size = <32768>; + i-cache-sets = <128>; + d-cache-block-size = <64>; + d-cache-size = <32768>; + d-cache-sets = <128>; + next-level-cache = <&cluster0_l2_cache>; + mmu-type = "riscv,sv39"; + + cpu0_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + cpu_1: cpu@1 { + compatible = "spacemit,x60", "riscv"; + device_type = "cpu"; + reg = <1>; + riscv,isa = "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_zkt_zvfh_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "v", "zicbom", + "zicbop", "zicboz", "zicntr", "zicond", "zicsr", + "zifencei", "zihintpause", "zihpm", "zfh", "zba", + "zbb", "zbc", "zbs", "zkt", "zvfh", "zvkt", + "sscofpmf", "sstc", "svinval", "svnapot", "svpbmt"; + riscv,cbom-block-size = <64>; + riscv,cbop-block-size = <64>; + riscv,cboz-block-size = <64>; + i-cache-block-size = <64>; + i-cache-size = <32768>; + i-cache-sets = <128>; + d-cache-block-size = <64>; + d-cache-size = <32768>; + d-cache-sets = <128>; + next-level-cache = <&cluster0_l2_cache>; + mmu-type = "riscv,sv39"; + + cpu1_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + cpu_2: cpu@2 { + compatible = "spacemit,x60", "riscv"; + device_type = "cpu"; + reg = <2>; + riscv,isa = "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_zkt_zvfh_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "v", "zicbom", + "zicbop", "zicboz", "zicntr", "zicond", "zicsr", + "zifencei", "zihintpause", "zihpm", "zfh", "zba", + "zbb", "zbc", "zbs", "zkt", "zvfh", "zvkt", + "sscofpmf", "sstc", "svinval", "svnapot", "svpbmt"; + riscv,cbom-block-size = <64>; + riscv,cbop-block-size = <64>; + riscv,cboz-block-size = <64>; + i-cache-block-size = <64>; + i-cache-size = <32768>; + i-cache-sets = <128>; + d-cache-block-size = <64>; + d-cache-size = <32768>; + d-cache-sets = <128>; + next-level-cache = <&cluster0_l2_cache>; + mmu-type = "riscv,sv39"; + + cpu2_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + cpu_3: cpu@3 { + compatible = "spacemit,x60", "riscv"; + device_type = "cpu"; + reg = <3>; + riscv,isa = "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_zkt_zvfh_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "v", "zicbom", + "zicbop", "zicboz", "zicntr", "zicond", "zicsr", + "zifencei", "zihintpause", "zihpm", "zfh", "zba", + "zbb", "zbc", "zbs", "zkt", "zvfh", "zvkt", + "sscofpmf", "sstc", "svinval", "svnapot", "svpbmt"; + riscv,cbom-block-size = <64>; + riscv,cbop-block-size = <64>; + riscv,cboz-block-size = <64>; + i-cache-block-size = <64>; + i-cache-size = <32768>; + i-cache-sets = <128>; + d-cache-block-size = <64>; + d-cache-size = <32768>; + d-cache-sets = <128>; + next-level-cache = <&cluster0_l2_cache>; + mmu-type = "riscv,sv39"; + + cpu3_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + cpu_4: cpu@4 { + compatible = "spacemit,x60", "riscv"; + device_type = "cpu"; + reg = <4>; + riscv,isa = "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_zkt_zvfh_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "v", "zicbom", + "zicbop", "zicboz", "zicntr", "zicond", "zicsr", + "zifencei", "zihintpause", "zihpm", "zfh", "zba", + "zbb", "zbc", "zbs", "zkt", "zvfh", "zvkt", + "sscofpmf", "sstc", "svinval", "svnapot", "svpbmt"; + riscv,cbom-block-size = <64>; + riscv,cbop-block-size = <64>; + riscv,cboz-block-size = <64>; + i-cache-block-size = <64>; + i-cache-size = <32768>; + i-cache-sets = <128>; + d-cache-block-size = <64>; + d-cache-size = <32768>; + d-cache-sets = <128>; + next-level-cache = <&cluster1_l2_cache>; + mmu-type = "riscv,sv39"; + + cpu4_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + cpu_5: cpu@5 { + compatible = "spacemit,x60", "riscv"; + device_type = "cpu"; + reg = <5>; + riscv,isa = "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_zkt_zvfh_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "v", "zicbom", + "zicbop", "zicboz", "zicntr", "zicond", "zicsr", + "zifencei", "zihintpause", "zihpm", "zfh", "zba", + "zbb", "zbc", "zbs", "zkt", "zvfh", "zvkt", + "sscofpmf", "sstc", "svinval", "svnapot", "svpbmt"; + riscv,cbom-block-size = <64>; + riscv,cbop-block-size = <64>; + riscv,cboz-block-size = <64>; + i-cache-block-size = <64>; + i-cache-size = <32768>; + i-cache-sets = <128>; + d-cache-block-size = <64>; + d-cache-size = <32768>; + d-cache-sets = <128>; + next-level-cache = <&cluster1_l2_cache>; + mmu-type = "riscv,sv39"; + + cpu5_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + cpu_6: cpu@6 { + compatible = "spacemit,x60", "riscv"; + device_type = "cpu"; + reg = <6>; + riscv,isa = "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_zkt_zvfh_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "v", "zicbom", + "zicbop", "zicboz", "zicntr", "zicond", "zicsr", + "zifencei", "zihintpause", "zihpm", "zfh", "zba", + "zbb", "zbc", "zbs", "zkt", "zvfh", "zvkt", + "sscofpmf", "sstc", "svinval", "svnapot", "svpbmt"; + riscv,cbom-block-size = <64>; + riscv,cbop-block-size = <64>; + riscv,cboz-block-size = <64>; + i-cache-block-size = <64>; + i-cache-size = <32768>; + i-cache-sets = <128>; + d-cache-block-size = <64>; + d-cache-size = <32768>; + d-cache-sets = <128>; + next-level-cache = <&cluster1_l2_cache>; + mmu-type = "riscv,sv39"; + + cpu6_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + cpu_7: cpu@7 { + compatible = "spacemit,x60", "riscv"; + device_type = "cpu"; + reg = <7>; + riscv,isa = "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_zkt_zvfh_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt"; + riscv,isa-base = "rv64i"; + riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "v", "zicbom", + "zicbop", "zicboz", "zicntr", "zicond", "zicsr", + "zifencei", "zihintpause", "zihpm", "zfh", "zba", + "zbb", "zbc", "zbs", "zkt", "zvfh", "zvkt", + "sscofpmf", "sstc", "svinval", "svnapot", "svpbmt"; + riscv,cbom-block-size = <64>; + riscv,cbop-block-size = <64>; + riscv,cboz-block-size = <64>; + i-cache-block-size = <64>; + i-cache-size = <32768>; + i-cache-sets = <128>; + d-cache-block-size = <64>; + d-cache-size = <32768>; + d-cache-sets = <128>; + next-level-cache = <&cluster1_l2_cache>; + mmu-type = "riscv,sv39"; + + cpu7_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + cluster0_l2_cache: l2-cache0 { + compatible = "cache"; + cache-block-size = <64>; + cache-level = <2>; + cache-size = <524288>; + cache-sets = <512>; + cache-unified; + }; + + cluster1_l2_cache: l2-cache1 { + compatible = "cache"; + cache-block-size = <64>; + cache-level = <2>; + cache-size = <524288>; + cache-sets = <512>; + cache-unified; + }; + }; + + soc { + compatible = "simple-bus"; + interrupt-parent = <&plic>; + #address-cells = <2>; + #size-cells = <2>; + dma-noncoherent; + ranges; + + uart0: serial@d4017000 { + compatible = "spacemit,k1-uart", "intel,xscale-uart"; + reg = <0x0 0xd4017000 0x0 0x100>; + interrupts = <42>; + clock-frequency = <14857000>; + reg-shift = <2>; + reg-io-width = <4>; + status = "disabled"; + }; + + uart2: serial@d4017100 { + compatible = "spacemit,k1-uart", "intel,xscale-uart"; + reg = <0x0 0xd4017100 0x0 0x100>; + interrupts = <44>; + clock-frequency = <14857000>; + reg-shift = <2>; + reg-io-width = <4>; + status = "disabled"; + }; + + uart3: serial@d4017200 { + compatible = "spacemit,k1-uart", "intel,xscale-uart"; + reg = <0x0 0xd4017200 0x0 0x100>; + interrupts = <45>; + clock-frequency = <14857000>; + reg-shift = <2>; + reg-io-width = <4>; + status = "disabled"; + }; + + uart4: serial@d4017300 { + compatible = "spacemit,k1-uart", "intel,xscale-uart"; + reg = <0x0 0xd4017300 0x0 0x100>; + interrupts = <46>; + clock-frequency = <14857000>; + reg-shift = <2>; + reg-io-width = <4>; + status = "disabled"; + }; + + uart5: serial@d4017400 { + compatible = "spacemit,k1-uart", "intel,xscale-uart"; + reg = <0x0 0xd4017400 0x0 0x100>; + interrupts = <47>; + clock-frequency = <14857000>; + reg-shift = <2>; + reg-io-width = <4>; + status = "disabled"; + }; + + uart6: serial@d4017500 { + compatible = "spacemit,k1-uart", "intel,xscale-uart"; + reg = <0x0 0xd4017500 0x0 0x100>; + interrupts = <48>; + clock-frequency = <14857000>; + reg-shift = <2>; + reg-io-width = <4>; + status = "disabled"; + }; + + uart7: serial@d4017600 { + compatible = "spacemit,k1-uart", "intel,xscale-uart"; + reg = <0x0 0xd4017600 0x0 0x100>; + interrupts = <49>; + clock-frequency = <14857000>; + reg-shift = <2>; + reg-io-width = <4>; + status = "disabled"; + }; + + uart8: serial@d4017700 { + compatible = "spacemit,k1-uart", "intel,xscale-uart"; + reg = <0x0 0xd4017700 0x0 0x100>; + interrupts = <50>; + clock-frequency = <14857000>; + reg-shift = <2>; + reg-io-width = <4>; + status = "disabled"; + }; + + uart9: serial@d4017800 { + compatible = "spacemit,k1-uart", "intel,xscale-uart"; + reg = <0x0 0xd4017800 0x0 0x100>; + interrupts = <51>; + clock-frequency = <14857000>; + reg-shift = <2>; + reg-io-width = <4>; + status = "disabled"; + }; + + pinctrl: pinctrl@d401e000 { + compatible = "spacemit,k1-pinctrl"; + reg = <0x0 0xd401e000 0x0 0x400>; + }; + + plic: interrupt-controller@e0000000 { + compatible = "spacemit,k1-plic", "sifive,plic-1.0.0"; + reg = <0x0 0xe0000000 0x0 0x4000000>; + interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 9>, + <&cpu1_intc 11>, <&cpu1_intc 9>, + <&cpu2_intc 11>, <&cpu2_intc 9>, + <&cpu3_intc 11>, <&cpu3_intc 9>, + <&cpu4_intc 11>, <&cpu4_intc 9>, + <&cpu5_intc 11>, <&cpu5_intc 9>, + <&cpu6_intc 11>, <&cpu6_intc 9>, + <&cpu7_intc 11>, <&cpu7_intc 9>; + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <1>; + riscv,ndev = <159>; + }; + + clint: timer@e4000000 { + compatible = "spacemit,k1-clint", "sifive,clint0"; + reg = <0x0 0xe4000000 0x0 0x10000>; + interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>, + <&cpu1_intc 3>, <&cpu1_intc 7>, + <&cpu2_intc 3>, <&cpu2_intc 7>, + <&cpu3_intc 3>, <&cpu3_intc 7>, + <&cpu4_intc 3>, <&cpu4_intc 7>, + <&cpu5_intc 3>, <&cpu5_intc 7>, + <&cpu6_intc 3>, <&cpu6_intc 7>, + <&cpu7_intc 3>, <&cpu7_intc 7>; + }; + + sec_uart1: serial@f0612000 { + compatible = "spacemit,k1-uart", "intel,xscale-uart"; + reg = <0x0 0xf0612000 0x0 0x100>; + interrupts = <43>; + clock-frequency = <14857000>; + reg-shift = <2>; + reg-io-width = <4>; + status = "reserved"; /* for TEE usage */ + }; + }; +}; diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi index 48fb5091b817..c2f70f5e2918 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi @@ -233,7 +233,7 @@ regulator-always-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1540000>; - regulator-name = "vdd-cpu"; + regulator-name = "vdd_cpu"; }; emmc_vdd: aldo4 { @@ -350,12 +350,6 @@ &spi0 { pinctrl-names = "default"; pinctrl-0 = <&spi0_pins>; - - spi_dev0: spi@0 { - compatible = "rohm,dh2228fv"; - reg = <0>; - spi-max-frequency = <10000000>; - }; }; &syscrg { diff --git a/arch/riscv/boot/dts/starfive/jh7110-deepcomputing-fml13v01.dts b/arch/riscv/boot/dts/starfive/jh7110-deepcomputing-fml13v01.dts index 30b0715196b6..8d9ce8b69a71 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-deepcomputing-fml13v01.dts +++ b/arch/riscv/boot/dts/starfive/jh7110-deepcomputing-fml13v01.dts @@ -11,6 +11,40 @@ compatible = "deepcomputing,fml13v01", "starfive,jh7110"; }; +&pcie1 { + perst-gpios = <&sysgpio 21 GPIO_ACTIVE_LOW>; + phys = <&pciephy1>; + pinctrl-names = "default"; + pinctrl-0 = <&pcie1_pins>; + status = "okay"; +}; + +&sysgpio { + pcie1_pins: pcie1-0 { + clkreq-pins { + pinmux = <GPIOMUX(29, GPOUT_LOW, + GPOEN_DISABLE, + GPI_NONE)>; + bias-pull-down; + drive-strength = <2>; + input-enable; + input-schmitt-disable; + slew-rate = <0>; + }; + + wake-pins { + pinmux = <GPIOMUX(28, GPOUT_HIGH, + GPOEN_DISABLE, + GPI_NONE)>; + bias-pull-up; + drive-strength = <2>; + input-enable; + input-schmitt-disable; + slew-rate = <0>; + }; + }; +}; + &usb0 { dr_mode = "host"; status = "okay"; diff --git a/arch/riscv/boot/dts/starfive/jh7110-milkv-mars.dts b/arch/riscv/boot/dts/starfive/jh7110-milkv-mars.dts index 0d248b671d4b..3bd62ab78523 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-milkv-mars.dts +++ b/arch/riscv/boot/dts/starfive/jh7110-milkv-mars.dts @@ -53,7 +53,23 @@ status = "okay"; }; +&sysgpio { + usb0_pins: usb0-0 { + vbus-pins { + pinmux = <GPIOMUX(25, GPOUT_SYS_USB_DRIVE_VBUS, + GPOEN_ENABLE, + GPI_NONE)>; + bias-disable; + input-disable; + input-schmitt-disable; + slew-rate = <0>; + }; + }; +}; + &usb0 { - dr_mode = "peripheral"; + dr_mode = "host"; + pinctrl-names = "default"; + pinctrl-0 = <&usb0_pins>; status = "okay"; }; diff --git a/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts b/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts index fe4a490ecc61..31e825be2065 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts +++ b/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts @@ -80,7 +80,28 @@ status = "okay"; }; +&sysgpio { + usb0_pins: usb0-0 { + vbus-pins { + pinmux = <GPIOMUX(25, GPOUT_SYS_USB_DRIVE_VBUS, + GPOEN_ENABLE, + GPI_NONE)>; + bias-disable; + input-disable; + input-schmitt-disable; + slew-rate = <0>; + }; + }; +}; + &usb0 { - dr_mode = "peripheral"; + dr_mode = "host"; + pinctrl-names = "default"; + pinctrl-0 = <&usb0_pins>; status = "okay"; }; + +&usb_cdns3 { + phys = <&usbphy0>, <&pciephy0>; + phy-names = "cdns3,usb2-phy", "cdns3,usb3-phy"; +}; diff --git a/arch/riscv/boot/dts/starfive/jh7110-pinfunc.h b/arch/riscv/boot/dts/starfive/jh7110-pinfunc.h index 256de17f5261..ae49c908e7fb 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-pinfunc.h +++ b/arch/riscv/boot/dts/starfive/jh7110-pinfunc.h @@ -89,7 +89,7 @@ #define GPOUT_SYS_SDIO1_DATA1 59 #define GPOUT_SYS_SDIO1_DATA2 60 #define GPOUT_SYS_SDIO1_DATA3 61 -#define GPOUT_SYS_SDIO1_DATA4 63 +#define GPOUT_SYS_SDIO1_DATA4 62 #define GPOUT_SYS_SDIO1_DATA5 63 #define GPOUT_SYS_SDIO1_DATA6 64 #define GPOUT_SYS_SDIO1_DATA7 65 diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi index 0d8339357bad..0ba74ef04679 100644 --- a/arch/riscv/boot/dts/starfive/jh7110.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi @@ -611,6 +611,8 @@ pciephy0: phy@10210000 { compatible = "starfive,jh7110-pcie-phy"; reg = <0x0 0x10210000 0x0 0x10000>; + starfive,sys-syscon = <&sys_syscon 0x18>; + starfive,stg-syscon = <&stg_syscon 0x148 0x1f4>; #phy-cells = <0>; }; @@ -1022,7 +1024,6 @@ snps,force_thresh_dma_mode; snps,axi-config = <&stmmac_axi_setup>; snps,tso; - snps,en-tx-lpi-clockgating; snps,txpbl = <16>; snps,rxpbl = <16>; starfive,syscon = <&aon_syscon 0xc 0x12>; @@ -1053,7 +1054,6 @@ snps,force_thresh_dma_mode; snps,axi-config = <&stmmac_axi_setup>; snps,tso; - snps,en-tx-lpi-clockgating; snps,txpbl = <16>; snps,rxpbl = <16>; starfive,syscon = <&sys_syscon 0x90 0x2>; diff --git a/arch/riscv/boot/dts/thead/th1520.dtsi b/arch/riscv/boot/dts/thead/th1520.dtsi index acfe030e803a..527336417765 100644 --- a/arch/riscv/boot/dts/thead/th1520.dtsi +++ b/arch/riscv/boot/dts/thead/th1520.dtsi @@ -599,6 +599,22 @@ status = "disabled"; }; + mbox_910t: mailbox@ffffc38000 { + compatible = "thead,th1520-mbox"; + reg = <0xff 0xffc38000 0x0 0x6000>, + <0xff 0xffc40000 0x0 0x6000>, + <0xff 0xffc4c000 0x0 0x2000>, + <0xff 0xffc54000 0x0 0x2000>; + reg-names = "local", "remote-icu0", "remote-icu1", "remote-icu2"; + clocks = <&clk CLK_MBOX0>, <&clk CLK_MBOX1>, <&clk CLK_MBOX2>, + <&clk CLK_MBOX3>; + clock-names = "clk-local", "clk-remote-icu0", "clk-remote-icu1", + "clk-remote-icu2"; + interrupt-parent = <&plic>; + interrupts = <28 IRQ_TYPE_LEVEL_HIGH>; + #mbox-cells = <1>; + }; + gpio@fffff41000 { compatible = "snps,dw-apb-gpio"; reg = <0xff 0xfff41000 0x0 0x1000>; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index b4a37345703e..0f7dcbe3c45b 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -10,7 +10,6 @@ CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y @@ -30,6 +29,7 @@ CONFIG_ARCH_MICROCHIP=y CONFIG_ARCH_RENESAS=y CONFIG_ARCH_SIFIVE=y CONFIG_ARCH_SOPHGO=y +CONFIG_ARCH_SPACEMIT=y CONFIG_SOC_STARFIVE=y CONFIG_ARCH_SUNXI=y CONFIG_ARCH_THEAD=y @@ -167,6 +167,7 @@ CONFIG_PINCTRL_SOPHGO_CV1800B=y CONFIG_PINCTRL_SOPHGO_CV1812H=y CONFIG_PINCTRL_SOPHGO_SG2000=y CONFIG_PINCTRL_SOPHGO_SG2002=y +CONFIG_PINCTRL_TH1520=y CONFIG_GPIO_DWAPB=y CONFIG_GPIO_SIFIVE=y CONFIG_POWER_RESET_GPIO_RESTART=y @@ -242,6 +243,7 @@ CONFIG_RTC_DRV_SUN6I=y CONFIG_DMADEVICES=y CONFIG_DMA_SUN6I=m CONFIG_DW_AXI_DMAC=y +CONFIG_DWMAC_THEAD=m CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index e24770a77932..0b942183f708 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -10,6 +10,7 @@ #include <linux/string.h> #include <linux/uaccess.h> #include <asm/alternative.h> +#include <asm/bugs.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/dma-noncoherent.h> @@ -142,6 +143,31 @@ static bool errata_probe_pmu(unsigned int stage, return true; } +static bool errata_probe_ghostwrite(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_GHOSTWRITE)) + return false; + + /* + * target-c9xx cores report arch_id and impid as 0 + * + * While ghostwrite may not affect all c9xx cores that implement + * xtheadvector, there is no futher granularity than c9xx. Assume + * vulnerable for this entire class of processors when xtheadvector is + * enabled. + */ + if (arch_id != 0 || impid != 0) + return false; + + if (stage != RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + ghostwrite_set_vulnerable(); + + return true; +} + static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid) { @@ -155,6 +181,8 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_pmu(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_PMU); + errata_probe_ghostwrite(stage, archid, impid); + return cpu_req_errata; } diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index de13d5a234f8..bd5fc9403295 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -4,6 +4,7 @@ syscall-y += syscall_table_64.h generic-y += early_ioremap.h generic-y += flat.h +generic-y += fprobe.h generic-y += kvm_para.h generic-y += mmzone.h generic-y += mcs_spinlock.h diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index fae152ea0508..49a0f48d93df 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -226,9 +226,9 @@ legacy: * @nr: Bit to set * @addr: Address to count from * - * This operation may be reordered on other architectures than x86. + * This is an atomic fully-ordered operation (implied full memory barrier). */ -static inline int arch_test_and_set_bit(int nr, volatile unsigned long *addr) +static __always_inline int arch_test_and_set_bit(int nr, volatile unsigned long *addr) { return __test_and_op_bit(or, __NOP, nr, addr); } @@ -238,9 +238,9 @@ static inline int arch_test_and_set_bit(int nr, volatile unsigned long *addr) * @nr: Bit to clear * @addr: Address to count from * - * This operation can be reordered on other architectures other than x86. + * This is an atomic fully-ordered operation (implied full memory barrier). */ -static inline int arch_test_and_clear_bit(int nr, volatile unsigned long *addr) +static __always_inline int arch_test_and_clear_bit(int nr, volatile unsigned long *addr) { return __test_and_op_bit(and, __NOT, nr, addr); } @@ -253,7 +253,7 @@ static inline int arch_test_and_clear_bit(int nr, volatile unsigned long *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int arch_test_and_change_bit(int nr, volatile unsigned long *addr) +static __always_inline int arch_test_and_change_bit(int nr, volatile unsigned long *addr) { return __test_and_op_bit(xor, __NOP, nr, addr); } @@ -270,7 +270,7 @@ static inline int arch_test_and_change_bit(int nr, volatile unsigned long *addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void arch_set_bit(int nr, volatile unsigned long *addr) +static __always_inline void arch_set_bit(int nr, volatile unsigned long *addr) { __op_bit(or, __NOP, nr, addr); } @@ -284,7 +284,7 @@ static inline void arch_set_bit(int nr, volatile unsigned long *addr) * on non x86 architectures, so if you are writing portable code, * make sure not to rely on its reordering guarantees. */ -static inline void arch_clear_bit(int nr, volatile unsigned long *addr) +static __always_inline void arch_clear_bit(int nr, volatile unsigned long *addr) { __op_bit(and, __NOT, nr, addr); } @@ -298,7 +298,7 @@ static inline void arch_clear_bit(int nr, volatile unsigned long *addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void arch_change_bit(int nr, volatile unsigned long *addr) +static __always_inline void arch_change_bit(int nr, volatile unsigned long *addr) { __op_bit(xor, __NOP, nr, addr); } @@ -311,7 +311,7 @@ static inline void arch_change_bit(int nr, volatile unsigned long *addr) * This operation is atomic and provides acquire barrier semantics. * It can be used to implement bit locks. */ -static inline int arch_test_and_set_bit_lock( +static __always_inline int arch_test_and_set_bit_lock( unsigned long nr, volatile unsigned long *addr) { return __test_and_op_bit_ord(or, __NOP, nr, addr, .aq); @@ -324,7 +324,7 @@ static inline int arch_test_and_set_bit_lock( * * This operation is atomic and provides release barrier semantics. */ -static inline void arch_clear_bit_unlock( +static __always_inline void arch_clear_bit_unlock( unsigned long nr, volatile unsigned long *addr) { __op_bit_ord(and, __NOT, nr, addr, .rl); @@ -345,13 +345,13 @@ static inline void arch_clear_bit_unlock( * non-atomic property here: it's a lot more instructions and we still have to * provide release semantics anyway. */ -static inline void arch___clear_bit_unlock( +static __always_inline void arch___clear_bit_unlock( unsigned long nr, volatile unsigned long *addr) { arch_clear_bit_unlock(nr, addr); } -static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, +static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, volatile unsigned long *addr) { unsigned long res; diff --git a/arch/riscv/include/asm/bugs.h b/arch/riscv/include/asm/bugs.h new file mode 100644 index 000000000000..17ca0a947730 --- /dev/null +++ b/arch/riscv/include/asm/bugs.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Interface for managing mitigations for riscv vulnerabilities. + * + * Copyright (C) 2024 Rivos Inc. + */ + +#ifndef __ASM_BUGS_H +#define __ASM_BUGS_H + +/* Watch out, ordering is important here. */ +enum mitigation_state { + UNAFFECTED, + MITIGATED, + VULNERABLE, +}; + +void ghostwrite_set_vulnerable(void); +bool ghostwrite_enable_mitigation(void); +enum mitigation_state ghostwrite_get_state(void); + +#endif /* __ASM_BUGS_H */ diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 4cadc56220fe..427c41dde643 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -231,7 +231,7 @@ __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ sc_prepend, sc_append, \ cas_prepend, cas_append, \ - __ret, __ptr, (long), __old, __new); \ + __ret, __ptr, (long)(int)(long), __old, __new); \ break; \ case 8: \ __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 4bd054c54c21..569140d6e639 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -34,6 +34,8 @@ DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; +extern u32 thead_vlenb_of; + void __init riscv_user_isa_enable(void); #define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \ diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 37bdea65bbd8..6fed42e37705 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -30,6 +30,12 @@ #define SR_VS_CLEAN _AC(0x00000400, UL) #define SR_VS_DIRTY _AC(0x00000600, UL) +#define SR_VS_THEAD _AC(0x01800000, UL) /* xtheadvector Status */ +#define SR_VS_OFF_THEAD _AC(0x00000000, UL) +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL) +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL) +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL) + #define SR_XS _AC(0x00018000, UL) /* Extension Status */ #define SR_XS_OFF _AC(0x00000000, UL) #define SR_XS_INITIAL _AC(0x00008000, UL) @@ -315,6 +321,15 @@ #define CSR_STIMECMP 0x14D #define CSR_STIMECMPH 0x15D +/* xtheadvector symbolic CSR names */ +#define CSR_VXSAT 0x9 +#define CSR_VXRM 0xa + +/* xtheadvector CSR masks */ +#define CSR_VXRM_MASK 3 +#define CSR_VXRM_SHIFT 1 +#define CSR_VXSAT_MASK 1 + /* Supervisor-Level Window to Indirectly Accessed Registers (AIA) */ #define CSR_SISELECT 0x150 #define CSR_SIREG 0x151 diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 7c8a71a526a3..6e426ed7919a 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -25,7 +25,8 @@ #ifdef CONFIG_ERRATA_THEAD #define ERRATA_THEAD_MAE 0 #define ERRATA_THEAD_PMU 1 -#define ERRATA_THEAD_NUMBER 2 +#define ERRATA_THEAD_GHOSTWRITE 2 +#define ERRATA_THEAD_NUMBER 3 #endif #ifdef __ASSEMBLY__ diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 3d66437a1029..c4721ce44ca4 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -168,6 +168,11 @@ static __always_inline unsigned long ftrace_regs_get_stack_pointer(const struct return arch_ftrace_regs(fregs)->sp; } +static __always_inline unsigned long ftrace_regs_get_frame_pointer(const struct ftrace_regs *fregs) +{ + return arch_ftrace_regs(fregs)->s0; +} + static __always_inline unsigned long ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n) { @@ -181,6 +186,11 @@ static __always_inline unsigned long ftrace_regs_get_return_value(const struct f return arch_ftrace_regs(fregs)->a0; } +static __always_inline unsigned long ftrace_regs_get_return_address(const struct ftrace_regs *fregs) +{ + return arch_ftrace_regs(fregs)->ra; +} + static __always_inline void ftrace_regs_set_return_value(struct ftrace_regs *fregs, unsigned long ret) { @@ -192,6 +202,20 @@ static __always_inline void ftrace_override_function_with_return(struct ftrace_r arch_ftrace_regs(fregs)->epc = arch_ftrace_regs(fregs)->ra; } +static __always_inline struct pt_regs * +ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs) +{ + struct __arch_ftrace_regs *afregs = arch_ftrace_regs(fregs); + + memcpy(®s->a0, afregs->args, sizeof(afregs->args)); + regs->epc = afregs->epc; + regs->ra = afregs->ra; + regs->sp = afregs->sp; + regs->s0 = afregs->s0; + regs->t1 = afregs->t1; + return regs; +} + int ftrace_regs_query_register_offset(const char *name); void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, @@ -208,25 +232,4 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsi #endif /* CONFIG_DYNAMIC_FTRACE */ -#ifndef __ASSEMBLY__ -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -struct fgraph_ret_regs { - unsigned long a1; - unsigned long a0; - unsigned long s0; - unsigned long ra; -}; - -static inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs) -{ - return ret_regs->a0; -} - -static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs) -{ - return ret_regs->s0; -} -#endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */ -#endif - #endif /* _ASM_RISCV_FTRACE_H */ diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h index fc8130f995c1..90c86b115e00 100644 --- a/arch/riscv/include/asm/futex.h +++ b/arch/riscv/include/asm/futex.h @@ -85,7 +85,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, __enable_user_access(); __asm__ __volatile__ ( - "1: lr.w.aqrl %[v],%[u] \n" + "1: lr.w %[v],%[u] \n" " bne %[v],%z[ov],3f \n" "2: sc.w.aqrl %[t],%z[nv],%[u] \n" " bnez %[t],1b \n" @@ -93,7 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %[r]) \ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %[r]) \ : [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr), [t] "=&r" (tmp) - : [ov] "Jr" (oldval), [nv] "Jr" (newval) + : [ov] "Jr" ((long)(int)oldval), [nv] "Jr" (newval) : "memory"); __disable_user_access(); diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index faf3624d8057..446126497768 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -28,7 +28,8 @@ void set_huge_pte_at(struct mm_struct *mm, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); + unsigned long addr, pte_t *ptep, + unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 1ce1df6d0ff3..dd624523981c 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * Copyright 2023 Rivos, Inc + * Copyright 2023-2024 Rivos, Inc */ #ifndef _ASM_HWPROBE_H @@ -8,7 +8,7 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 10 +#define RISCV_HWPROBE_MAX_KEY 11 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { @@ -21,6 +21,7 @@ static inline bool hwprobe_key_is_bitmask(__s64 key) case RISCV_HWPROBE_KEY_BASE_BEHAVIOR: case RISCV_HWPROBE_KEY_IMA_EXT_0: case RISCV_HWPROBE_KEY_CPUPERF_0: + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: return true; } diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 1c5c641075d2..0257f4aa7ff4 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -136,7 +136,7 @@ __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw()) #include <asm-generic/io.h> #ifdef CONFIG_MMU -#define arch_memremap_wb(addr, size) \ +#define arch_memremap_wb(addr, size, flags) \ ((__force void *)ioremap_prot((addr), (size), _PAGE_KERNEL)) #endif diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 35eab6e0f4ae..0e9c2fab6378 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -87,6 +87,11 @@ struct kvm_vcpu_stat { u64 csr_exit_kernel; u64 signal_exits; u64 exits; + u64 instr_illegal_exits; + u64 load_misaligned_exits; + u64 store_misaligned_exits; + u64 load_access_exits; + u64 store_access_exits; }; struct kvm_arch_memory_slot { @@ -296,8 +301,6 @@ static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu) return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu; } -static inline void kvm_arch_sync_events(struct kvm *kvm) {} - #define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12 void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index b96705258cf9..4ed6203cdd30 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -85,6 +85,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_susp; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 71aabc5c6713..125f5ecd9565 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -122,6 +122,7 @@ struct kernel_mapping { extern struct kernel_mapping kernel_map; extern phys_addr_t phys_ram_base; +extern unsigned long vmemmap_start_pfn; #define is_kernel_mapping(x) \ ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size)) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index f52264304f77..3e2aebea6312 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -12,16 +12,25 @@ #include <asm/tlb.h> #ifdef CONFIG_MMU -#define __HAVE_ARCH_PUD_ALLOC_ONE #define __HAVE_ARCH_PUD_FREE #include <asm-generic/pgalloc.h> +/* + * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to + * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use + * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this + * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the + * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h + * for more details. + */ static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { - if (riscv_use_sbi_for_rfence()) + if (riscv_use_sbi_for_rfence()) { tlb_remove_ptdesc(tlb, pt); - else + } else { + pagetable_dtor(pt); tlb_remove_page_ptdesc(tlb, pt); + } } static inline void pmd_populate_kernel(struct mm_struct *mm, @@ -88,15 +97,6 @@ static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, } } -#define pud_alloc_one pud_alloc_one -static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - if (pgtable_l4_enabled) - return __pud_alloc_one(mm, addr); - - return NULL; -} - #define pud_free pud_free static inline void pud_free(struct mm_struct *mm, pud_t *pud) { @@ -107,39 +107,8 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long addr) { - if (pgtable_l4_enabled) { - struct ptdesc *ptdesc = virt_to_ptdesc(pud); - - pagetable_pud_dtor(ptdesc); - riscv_tlb_remove_ptdesc(tlb, ptdesc); - } -} - -#define p4d_alloc_one p4d_alloc_one -static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - if (pgtable_l5_enabled) { - gfp_t gfp = GFP_PGTABLE_USER; - - if (mm == &init_mm) - gfp = GFP_PGTABLE_KERNEL; - return (p4d_t *)get_zeroed_page(gfp); - } - - return NULL; -} - -static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d) -{ - BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); - free_page((unsigned long)p4d); -} - -#define p4d_free p4d_free -static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) -{ - if (pgtable_l5_enabled) - __p4d_free(mm, p4d); + if (pgtable_l4_enabled) + riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); } static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, @@ -161,9 +130,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; - pgd = (pgd_t *)__get_free_page(GFP_KERNEL); + pgd = __pgd_alloc(mm, 0); if (likely(pgd != NULL)) { - memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); /* Copy kernel mappings */ sync_kernel_mappings(pgd); } @@ -175,10 +143,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) { - struct ptdesc *ptdesc = virt_to_ptdesc(pmd); - - pagetable_pmd_dtor(ptdesc); - riscv_tlb_remove_ptdesc(tlb, ptdesc); + riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -186,10 +151,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - struct ptdesc *ptdesc = page_ptdesc(pte); - - pagetable_pte_dtor(ptdesc); - riscv_tlb_remove_ptdesc(tlb, ptdesc); + riscv_tlb_remove_ptdesc(tlb, page_ptdesc(pte)); } #endif /* CONFIG_MMU */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index d4e99eef90ac..050fdc49b5ad 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -87,7 +87,7 @@ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. */ -#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)) +#define vmemmap ((struct page *)VMEMMAP_START - vmemmap_start_pfn) #define PCI_IO_SIZE SZ_16M #define PCI_IO_END VMEMMAP_START diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 6c82318065cf..3d250824178b 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -159,6 +159,7 @@ struct riscv_pmu_snapshot_data { }; #define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0) +#define RISCV_PMU_PLAT_FW_EVENT_MASK GENMASK_ULL(61, 0) #define RISCV_PMU_RAW_EVENT_IDX 0x20000 #define RISCV_PLAT_FW_EVENT 0xFFFF diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h index e5121b89acea..52f11bfd0079 100644 --- a/arch/riscv/include/asm/spinlock.h +++ b/arch/riscv/include/asm/spinlock.h @@ -3,8 +3,11 @@ #ifndef __ASM_RISCV_SPINLOCK_H #define __ASM_RISCV_SPINLOCK_H -#ifdef CONFIG_RISCV_COMBO_SPINLOCKS +#ifdef CONFIG_QUEUED_SPINLOCKS #define _Q_PENDING_LOOPS (1 << 9) +#endif + +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS #define __no_arch_spinlock_redefine #include <asm/ticket_spinlock.h> diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 94e33216b2d9..0e71eb82f920 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -117,7 +117,7 @@ do { \ __set_prev_cpu(__prev->thread); \ if (has_fpu()) \ __switch_to_fpu(__prev, __next); \ - if (has_vector()) \ + if (has_vector() || has_xtheadvector()) \ __switch_to_vector(__prev, __next); \ if (switch_to_should_flush_icache(__next)) \ local_flush_icache_all(); \ diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h index 1f6c38420d8e..50b63b5c15bd 100644 --- a/arch/riscv/include/asm/tlb.h +++ b/arch/riscv/include/asm/tlb.h @@ -10,24 +10,6 @@ struct mmu_gather; static void tlb_flush(struct mmu_gather *tlb); -#ifdef CONFIG_MMU -#include <linux/swap.h> - -/* - * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to - * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use - * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this - * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the - * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h - * for more details. - */ -static inline void __tlb_remove_table(void *table) -{ - free_page_and_swap_cache(table); -} - -#endif /* CONFIG_MMU */ - #define tlb_flush tlb_flush #include <asm-generic/tlb.h> diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index f891478829a5..c130d8100232 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -14,7 +14,7 @@ */ #ifdef CONFIG_MMU -#define __VVAR_PAGES 2 +#define __VDSO_PAGES 4 #ifndef __ASSEMBLY__ #include <generated/vdso-offsets.h> diff --git a/arch/riscv/include/asm/vdso/time_data.h b/arch/riscv/include/asm/vdso/arch_data.h index dfa65228999b..da57a3786f7a 100644 --- a/arch/riscv/include/asm/vdso/time_data.h +++ b/arch/riscv/include/asm/vdso/arch_data.h @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __RISCV_ASM_VDSO_TIME_DATA_H -#define __RISCV_ASM_VDSO_TIME_DATA_H +#ifndef __RISCV_ASM_VDSO_ARCH_DATA_H +#define __RISCV_ASM_VDSO_ARCH_DATA_H #include <linux/types.h> #include <vdso/datapage.h> #include <asm/hwprobe.h> -struct arch_vdso_time_data { +struct vdso_arch_data { /* Stash static answers to the hwprobe queries when all CPUs are selected. */ __u64 all_cpu_hwprobe_values[RISCV_HWPROBE_MAX_KEY + 1]; @@ -14,4 +14,4 @@ struct arch_vdso_time_data { __u8 homogeneous_cpus; }; -#endif /* __RISCV_ASM_VDSO_TIME_DATA_H */ +#endif /* __RISCV_ASM_VDSO_ARCH_DATA_H */ diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h index ba3283cf7acc..29164f84f93c 100644 --- a/arch/riscv/include/asm/vdso/gettimeofday.h +++ b/arch/riscv/include/asm/vdso/gettimeofday.h @@ -69,7 +69,7 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) #endif /* CONFIG_GENERIC_TIME_VSYSCALL */ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { /* * The purpose of csr_read(CSR_TIME) is to trap the system into @@ -79,18 +79,6 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return csr_read(CSR_TIME); } -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) -{ - return _vdso_data; -} - -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return _timens_data; -} -#endif #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/riscv/include/asm/vdso/vsyscall.h b/arch/riscv/include/asm/vdso/vsyscall.h index e8a9c4b53c0c..1140b54b4bc8 100644 --- a/arch/riscv/include/asm/vdso/vsyscall.h +++ b/arch/riscv/include/asm/vdso/vsyscall.h @@ -6,15 +6,6 @@ #include <vdso/datapage.h> -extern struct vdso_data *vdso_data; - -static __always_inline struct vdso_data *__riscv_get_k_vdso_data(void) -{ - return vdso_data; -} - -#define __arch_get_k_vdso_data __riscv_get_k_vdso_data - /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index c7c023afbacd..e8a83f55be2b 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -18,6 +18,27 @@ #include <asm/cpufeature.h> #include <asm/csr.h> #include <asm/asm.h> +#include <asm/vendorid_list.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> + +#define __riscv_v_vstate_or(_val, TYPE) ({ \ + typeof(_val) _res = _val; \ + if (has_xtheadvector()) \ + _res = (_res & ~SR_VS_THEAD) | SR_VS_##TYPE##_THEAD; \ + else \ + _res = (_res & ~SR_VS) | SR_VS_##TYPE; \ + _res; \ +}) + +#define __riscv_v_vstate_check(_val, TYPE) ({ \ + bool _res; \ + if (has_xtheadvector()) \ + _res = ((_val) & SR_VS_THEAD) == SR_VS_##TYPE##_THEAD; \ + else \ + _res = ((_val) & SR_VS) == SR_VS_##TYPE; \ + _res; \ +}) extern unsigned long riscv_v_vsize; int riscv_v_setup_vsize(void); @@ -41,39 +62,62 @@ static __always_inline bool has_vector(void) return riscv_has_extension_unlikely(RISCV_ISA_EXT_ZVE32X); } +static __always_inline bool has_xtheadvector_no_alternatives(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR); + else + return false; +} + +static __always_inline bool has_xtheadvector(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return riscv_has_vendor_extension_unlikely(THEAD_VENDOR_ID, + RISCV_ISA_VENDOR_EXT_XTHEADVECTOR); + else + return false; +} + static inline void __riscv_v_vstate_clean(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN; + regs->status = __riscv_v_vstate_or(regs->status, CLEAN); } static inline void __riscv_v_vstate_dirty(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY; + regs->status = __riscv_v_vstate_or(regs->status, DIRTY); } static inline void riscv_v_vstate_off(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_OFF; + regs->status = __riscv_v_vstate_or(regs->status, OFF); } static inline void riscv_v_vstate_on(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_INITIAL; + regs->status = __riscv_v_vstate_or(regs->status, INITIAL); } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { - return (regs->status & SR_VS) != 0; + return !__riscv_v_vstate_check(regs->status, OFF); } static __always_inline void riscv_v_enable(void) { - csr_set(CSR_SSTATUS, SR_VS); + if (has_xtheadvector()) + csr_set(CSR_SSTATUS, SR_VS_THEAD); + else + csr_set(CSR_SSTATUS, SR_VS); } static __always_inline void riscv_v_disable(void) { - csr_clear(CSR_SSTATUS, SR_VS); + if (has_xtheadvector()) + csr_clear(CSR_SSTATUS, SR_VS_THEAD); + else + csr_clear(CSR_SSTATUS, SR_VS); } static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) @@ -82,10 +126,36 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) "csrr %0, " __stringify(CSR_VSTART) "\n\t" "csrr %1, " __stringify(CSR_VTYPE) "\n\t" "csrr %2, " __stringify(CSR_VL) "\n\t" - "csrr %3, " __stringify(CSR_VCSR) "\n\t" - "csrr %4, " __stringify(CSR_VLENB) "\n\t" : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), - "=r" (dest->vcsr), "=r" (dest->vlenb) : :); + "=r" (dest->vcsr) : :); + + if (has_xtheadvector()) { + unsigned long status; + + /* + * CSR_VCSR is defined as + * [2:1] - vxrm[1:0] + * [0] - vxsat + * The earlier vector spec implemented by T-Head uses separate + * registers for the same bit-elements, so just combine those + * into the existing output field. + * + * Additionally T-Head cores need FS to be enabled when accessing + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions. + * Though the cores do not implement the VXRM and VXSAT fields in the + * FCSR CSR that vector-0.7.1 specifies. + */ + status = csr_read_set(CSR_STATUS, SR_FS_DIRTY); + dest->vcsr = csr_read(CSR_VXSAT) | csr_read(CSR_VXRM) << CSR_VXRM_SHIFT; + + dest->vlenb = riscv_v_vsize / 32; + + if ((status & SR_FS) != SR_FS_DIRTY) + csr_write(CSR_STATUS, status); + } else { + dest->vcsr = csr_read(CSR_VCSR); + dest->vlenb = csr_read(CSR_VLENB); + } } static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) @@ -96,9 +166,25 @@ static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src "vsetvl x0, %2, %1\n\t" ".option pop\n\t" "csrw " __stringify(CSR_VSTART) ", %0\n\t" - "csrw " __stringify(CSR_VCSR) ", %3\n\t" - : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), - "r" (src->vcsr) :); + : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl)); + + if (has_xtheadvector()) { + unsigned long status = csr_read(CSR_SSTATUS); + + /* + * Similar to __vstate_csr_save above, restore values for the + * separate VXRM and VXSAT CSRs from the vcsr variable. + */ + status = csr_read_set(CSR_STATUS, SR_FS_DIRTY); + + csr_write(CSR_VXRM, (src->vcsr >> CSR_VXRM_SHIFT) & CSR_VXRM_MASK); + csr_write(CSR_VXSAT, src->vcsr & CSR_VXSAT_MASK); + + if ((status & SR_FS) != SR_FS_DIRTY) + csr_write(CSR_STATUS, status); + } else { + csr_write(CSR_VCSR, src->vcsr); + } } static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, @@ -108,19 +194,33 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, riscv_v_enable(); __vstate_csr_save(save_to); - asm volatile ( - ".option push\n\t" - ".option arch, +zve32x\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" - "vse8.v v0, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v8, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v16, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + if (has_xtheadvector()) { + asm volatile ( + "mv t0, %0\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VSB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VSB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VSB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VSB_V_V0T0 + : : "r" (datap) : "memory", "t0", "t4"); + } else { + asm volatile ( + ".option push\n\t" + ".option arch, +zve32x\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vse8.v v0, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (datap) : "memory"); + } riscv_v_disable(); } @@ -130,19 +230,33 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ unsigned long vl; riscv_v_enable(); - asm volatile ( - ".option push\n\t" - ".option arch, +zve32x\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" - "vle8.v v0, (%1)\n\t" - "add %1, %1, %0\n\t" - "vle8.v v8, (%1)\n\t" - "add %1, %1, %0\n\t" - "vle8.v v16, (%1)\n\t" - "add %1, %1, %0\n\t" - "vle8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + if (has_xtheadvector()) { + asm volatile ( + "mv t0, %0\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VLB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VLB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VLB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VLB_V_V0T0 + : : "r" (datap) : "memory", "t0", "t4"); + } else { + asm volatile ( + ".option push\n\t" + ".option arch, +zve32x\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vle8.v v0, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (datap) : "memory"); + } __vstate_csr_restore(restore_from); riscv_v_disable(); } @@ -152,33 +266,41 @@ static inline void __riscv_v_vstate_discard(void) unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1); riscv_v_enable(); + if (has_xtheadvector()) + asm volatile (THEAD_VSETVLI_T4X0E8M8D1 : : : "t4"); + else + asm volatile ( + ".option push\n\t" + ".option arch, +zve32x\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + ".option pop\n\t": "=&r" (vl)); + asm volatile ( ".option push\n\t" ".option arch, +zve32x\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" "vmv.v.i v0, -1\n\t" "vmv.v.i v8, -1\n\t" "vmv.v.i v16, -1\n\t" "vmv.v.i v24, -1\n\t" "vsetvl %0, x0, %1\n\t" ".option pop\n\t" - : "=&r" (vl) : "r" (vtype_inval) : "memory"); + : "=&r" (vl) : "r" (vtype_inval)); + riscv_v_disable(); } static inline void riscv_v_vstate_discard(struct pt_regs *regs) { - if ((regs->status & SR_VS) == SR_VS_OFF) - return; - - __riscv_v_vstate_discard(); - __riscv_v_vstate_dirty(regs); + if (riscv_v_vstate_query(regs)) { + __riscv_v_vstate_discard(); + __riscv_v_vstate_dirty(regs); + } } static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { - if ((regs->status & SR_VS) == SR_VS_DIRTY) { + if (__riscv_v_vstate_check(regs->status, DIRTY)) { __riscv_v_vstate_save(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } @@ -187,7 +309,7 @@ static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate, static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { - if ((regs->status & SR_VS) != SR_VS_OFF) { + if (riscv_v_vstate_query(regs)) { __riscv_v_vstate_restore(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } @@ -196,7 +318,7 @@ static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate, static inline void riscv_v_vstate_set_restore(struct task_struct *task, struct pt_regs *regs) { - if ((regs->status & SR_VS) != SR_VS_OFF) { + if (riscv_v_vstate_query(regs)) { set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE); riscv_v_vstate_on(regs); } @@ -270,6 +392,8 @@ struct pt_regs; static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static __always_inline bool has_vector(void) { return false; } static __always_inline bool insn_is_vector(u32 insn_buf) { return false; } +static __always_inline bool has_xtheadvector_no_alternatives(void) { return false; } +static __always_inline bool has_xtheadvector(void) { return false; } static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } diff --git a/arch/riscv/include/asm/vendor_extensions/thead.h b/arch/riscv/include/asm/vendor_extensions/thead.h new file mode 100644 index 000000000000..e85c75b3b340 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/thead.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_H + +#include <asm/vendor_extensions.h> + +#include <linux/types.h> + +/* + * Extension keys must be strictly less than RISCV_ISA_VENDOR_EXT_MAX. + */ +#define RISCV_ISA_VENDOR_EXT_XTHEADVECTOR 0 + +extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead; + +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD +void disable_xtheadvector(void); +#else +static inline void disable_xtheadvector(void) { } +#endif + +/* Extension specific helpers */ + +/* + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for + * vsetvli t4, x0, e8, m8, d1 + */ +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" + +/* + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize + * the call resulting in a different encoding and then using a value for + * the "mop" field that is not part of vector-0.7.1 + * So encode specific variants for vstate_save and _restore. + */ +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" + +#endif diff --git a/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h new file mode 100644 index 000000000000..65a9c5612466 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_HWPROBE_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_HWPROBE_H + +#include <linux/cpumask.h> + +#include <uapi/asm/hwprobe.h> + +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD +void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus); +#else +static inline void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + pair->value = 0; +} +#endif + +#endif diff --git a/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h new file mode 100644 index 000000000000..6b9293e984a9 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2024 Rivos, Inc + */ + +#ifndef _ASM_RISCV_SYS_HWPROBE_H +#define _ASM_RISCV_SYS_HWPROBE_H + +#include <asm/cpufeature.h> + +#define VENDOR_EXT_KEY(ext) \ + do { \ + if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_VENDOR_EXT_##ext)) \ + pair->value |= RISCV_HWPROBE_VENDOR_EXT_##ext; \ + else \ + missing |= RISCV_HWPROBE_VENDOR_EXT_##ext; \ + } while (false) + +/* + * Loop through and record extensions that 1) anyone has, and 2) anyone + * doesn't have. + * + * _extension_checks is an arbitrary C block to set the values of pair->value + * and missing. It should be filled with VENDOR_EXT_KEY expressions. + */ +#define VENDOR_EXTENSION_SUPPORTED(pair, cpus, per_hart_vendor_bitmap, _extension_checks) \ + do { \ + int cpu; \ + u64 missing = 0; \ + for_each_cpu(cpu, (cpus)) { \ + struct riscv_isavendorinfo *isainfo = &(per_hart_vendor_bitmap)[cpu]; \ + _extension_checks \ + } \ + (pair)->value &= ~missing; \ + } while (false) \ + +#endif /* _ASM_RISCV_SYS_HWPROBE_H */ diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h index 2f2bb0c84f9a..a5150cdf34d8 100644 --- a/arch/riscv/include/asm/vendorid_list.h +++ b/arch/riscv/include/asm/vendorid_list.h @@ -6,6 +6,7 @@ #define ASM_VENDOR_LIST_H #define ANDES_VENDOR_ID 0x31e +#define MICROCHIP_VENDOR_ID 0x029 #define SIFIVE_VENDOR_ID 0x489 #define THEAD_VENDOR_ID 0x5b7 diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 3af142b99f77..c3c1cc951cb9 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * Copyright 2023 Rivos, Inc + * Copyright 2023-2024 Rivos, Inc */ #ifndef _UAPI_ASM_HWPROBE_H @@ -94,6 +94,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW 2 #define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3 #define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4 +#define RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0 11 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 3482c9a73d1b..f06bc5efcd79 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -179,6 +179,9 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_SSNPM, KVM_RISCV_ISA_EXT_SVADE, KVM_RISCV_ISA_EXT_SVADU, + KVM_RISCV_ISA_EXT_SVVPTC, + KVM_RISCV_ISA_EXT_ZABHA, + KVM_RISCV_ISA_EXT_ZICCRSE, KVM_RISCV_ISA_EXT_MAX, }; @@ -198,6 +201,7 @@ enum KVM_RISCV_SBI_EXT_ID { KVM_RISCV_SBI_EXT_VENDOR, KVM_RISCV_SBI_EXT_DBCN, KVM_RISCV_SBI_EXT_STA, + KVM_RISCV_SBI_EXT_SUSP, KVM_RISCV_SBI_EXT_MAX, }; @@ -211,9 +215,6 @@ struct kvm_riscv_sbi_sta { #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - /* If you need to interpret the index values, here is the key: */ #define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000 #define KVM_REG_RISCV_TYPE_SHIFT 24 diff --git a/arch/riscv/include/uapi/asm/vendor/thead.h b/arch/riscv/include/uapi/asm/vendor/thead.h new file mode 100644 index 000000000000..43790ebe5faf --- /dev/null +++ b/arch/riscv/include/uapi/asm/vendor/thead.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#define RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR (1 << 0) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 063d1faf5a53..8d186bfced45 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -123,3 +123,5 @@ obj-$(CONFIG_COMPAT) += compat_vdso/ obj-$(CONFIG_64BIT) += pi/ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o + +obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += bugs.o diff --git a/arch/riscv/kernel/bugs.c b/arch/riscv/kernel/bugs.c new file mode 100644 index 000000000000..3655fe7d678c --- /dev/null +++ b/arch/riscv/kernel/bugs.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Rivos Inc. + */ + +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/sprintf.h> + +#include <asm/bugs.h> +#include <asm/vendor_extensions/thead.h> + +static enum mitigation_state ghostwrite_state; + +void ghostwrite_set_vulnerable(void) +{ + ghostwrite_state = VULNERABLE; +} + +/* + * Vendor extension alternatives will use the value set at the time of boot + * alternative patching, thus this must be called before boot alternatives are + * patched (and after extension probing) to be effective. + * + * Returns true if mitgated, false otherwise. + */ +bool ghostwrite_enable_mitigation(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR) && + ghostwrite_state == VULNERABLE && !cpu_mitigations_off()) { + disable_xtheadvector(); + ghostwrite_state = MITIGATED; + return true; + } + + return false; +} + +enum mitigation_state ghostwrite_get_state(void) +{ + return ghostwrite_state; +} + +ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) { + switch (ghostwrite_state) { + case UNAFFECTED: + return sprintf(buf, "Not affected\n"); + case MITIGATED: + return sprintf(buf, "Mitigation: xtheadvector disabled\n"); + case VULNERABLE: + fallthrough; + default: + return sprintf(buf, "Vulnerable\n"); + } + } else { + return sprintf(buf, "Not affected\n"); + } +} diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 2d40736fc37c..26b085dbdd07 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -108,11 +108,11 @@ int populate_cache_leaves(unsigned int cpu) if (!np) return -ENOENT; - if (of_property_read_bool(np, "cache-size")) + if (of_property_present(np, "cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) + if (of_property_present(np, "i-cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) + if (of_property_present(np, "d-cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); prev = np; @@ -125,11 +125,11 @@ int populate_cache_leaves(unsigned int cpu) break; if (level <= levels) break; - if (of_property_read_bool(np, "cache-size")) + if (of_property_present(np, "cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) + if (of_property_present(np, "i-cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) + if (of_property_present(np, "d-cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); levels = level; } diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c0916ed318c2..40ac72e407b6 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -17,6 +17,7 @@ #include <linux/of.h> #include <asm/acpi.h> #include <asm/alternative.h> +#include <asm/bugs.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwcap.h> @@ -26,6 +27,7 @@ #include <asm/sbi.h> #include <asm/vector.h> #include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> #define NUM_ALPHA_EXTS ('z' - 'a' + 1) @@ -39,6 +41,8 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; /* Per-cpu ISA extensions. */ struct riscv_isainfo hart_isa[NR_CPUS]; +u32 thead_vlenb_of; + /** * riscv_isa_extension_base() - Get base extension word * @@ -475,7 +479,7 @@ static void __init riscv_resolve_isa(unsigned long *source_isa, if (bit < RISCV_ISA_EXT_BASE) *this_hwcap |= isa2hwcap[bit]; } - } while (loop && memcmp(prev_resolved_isa, resolved_isa, sizeof(prev_resolved_isa))); + } while (loop && !bitmap_equal(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX)); } static void __init match_isa_ext(const char *name, const char *name_end, unsigned long *bitmap) @@ -791,9 +795,50 @@ static void __init riscv_fill_vendor_ext_list(int cpu) } } +static int has_thead_homogeneous_vlenb(void) +{ + int cpu; + u32 prev_vlenb = 0; + u32 vlenb; + + /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */ + if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return 0; + + for_each_possible_cpu(cpu) { + struct device_node *cpu_node; + + cpu_node = of_cpu_device_node_get(cpu); + if (!cpu_node) { + pr_warn("Unable to find cpu node\n"); + return -ENOENT; + } + + if (of_property_read_u32(cpu_node, "thead,vlenb", &vlenb)) { + of_node_put(cpu_node); + + if (prev_vlenb) + return -ENOENT; + continue; + } + + if (prev_vlenb && vlenb != prev_vlenb) { + of_node_put(cpu_node); + return -ENOENT; + } + + prev_vlenb = vlenb; + of_node_put(cpu_node); + } + + thead_vlenb_of = vlenb; + return 0; +} + static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) { unsigned int cpu; + bool mitigated; for_each_possible_cpu(cpu) { unsigned long this_hwcap = 0; @@ -844,6 +889,17 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) riscv_fill_vendor_ext_list(cpu); } + /* + * Execute ghostwrite mitigation immediately after detecting extensions + * to disable xtheadvector if necessary. + */ + mitigated = ghostwrite_enable_mitigation(); + + if (!mitigated && has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) { + pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n"); + disable_xtheadvector(); + } + if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) return -ENOENT; @@ -896,7 +952,8 @@ void __init riscv_fill_hwcap(void) elf_hwcap &= ~COMPAT_HWCAP_ISA_F; } - if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X)) { + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X) || + has_xtheadvector_no_alternatives()) { /* * This cannot fail when called on the boot hart */ diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index c200d329d4bd..33a5a9f2a0d4 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -23,21 +23,21 @@ REG_S a0, TASK_TI_A0(tp) csrr a0, CSR_CAUSE /* Exclude IRQs */ - blt a0, zero, _new_vmalloc_restore_context_a0 + blt a0, zero, .Lnew_vmalloc_restore_context_a0 REG_S a1, TASK_TI_A1(tp) /* Only check new_vmalloc if we are in page/protection fault */ li a1, EXC_LOAD_PAGE_FAULT - beq a0, a1, _new_vmalloc_kernel_address + beq a0, a1, .Lnew_vmalloc_kernel_address li a1, EXC_STORE_PAGE_FAULT - beq a0, a1, _new_vmalloc_kernel_address + beq a0, a1, .Lnew_vmalloc_kernel_address li a1, EXC_INST_PAGE_FAULT - bne a0, a1, _new_vmalloc_restore_context_a1 + bne a0, a1, .Lnew_vmalloc_restore_context_a1 -_new_vmalloc_kernel_address: +.Lnew_vmalloc_kernel_address: /* Is it a kernel address? */ csrr a0, CSR_TVAL - bge a0, zero, _new_vmalloc_restore_context_a1 + bge a0, zero, .Lnew_vmalloc_restore_context_a1 /* Check if a new vmalloc mapping appeared that could explain the trap */ REG_S a2, TASK_TI_A2(tp) @@ -69,7 +69,7 @@ _new_vmalloc_kernel_address: /* Check the value of new_vmalloc for this cpu */ REG_L a2, 0(a0) and a2, a2, a1 - beq a2, zero, _new_vmalloc_restore_context + beq a2, zero, .Lnew_vmalloc_restore_context /* Atomically reset the current cpu bit in new_vmalloc */ amoxor.d a0, a1, (a0) @@ -83,11 +83,11 @@ _new_vmalloc_kernel_address: csrw CSR_SCRATCH, x0 sret -_new_vmalloc_restore_context: +.Lnew_vmalloc_restore_context: REG_L a2, TASK_TI_A2(tp) -_new_vmalloc_restore_context_a1: +.Lnew_vmalloc_restore_context_a1: REG_L a1, TASK_TI_A1(tp) -_new_vmalloc_restore_context_a0: +.Lnew_vmalloc_restore_context_a0: REG_L a0, TASK_TI_A0(tp) .endm @@ -278,6 +278,7 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #else sret #endif +SYM_INNER_LABEL(ret_from_exception_end, SYM_L_GLOBAL) SYM_CODE_END(ret_from_exception) ASM_NOKPROBE(ret_from_exception) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 8cb9b211611d..3524db5e4fa0 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -214,7 +214,22 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - prepare_ftrace_return(&arch_ftrace_regs(fregs)->ra, ip, arch_ftrace_regs(fregs)->s0); + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long frame_pointer = arch_ftrace_regs(fregs)->s0; + unsigned long *parent = &arch_ftrace_regs(fregs)->ra; + unsigned long old; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * We don't suffer access faults, so no extra fault-recovery assembly + * is needed here. + */ + old = *parent; + + if (!function_graph_enter_regs(old, ip, frame_pointer, parent, fregs)) + *parent = return_hooker; } #else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ extern void ftrace_graph_call(void); diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c index 6afe80c7f03a..99972a48e86b 100644 --- a/arch/riscv/kernel/kernel_mode_vector.c +++ b/arch/riscv/kernel/kernel_mode_vector.c @@ -143,7 +143,7 @@ static int riscv_v_start_kernel_context(bool *is_nested) /* Transfer the ownership of V from user to kernel, then save */ riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY); - if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) { + if (__riscv_v_vstate_check(task_pt_regs(current)->status, DIRTY)) { uvstate = ¤t->thread.vstate; __riscv_v_vstate_save(uvstate, uvstate->datap); } @@ -160,7 +160,7 @@ asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs) return; depth = riscv_v_ctx_get_depth(); - if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY) + if (depth == 0 && __riscv_v_vstate_check(regs->status, DIRTY)) riscv_preempt_v_set_dirty(); riscv_v_ctx_depth_inc(); @@ -208,7 +208,7 @@ void kernel_vector_begin(void) { bool nested = false; - if (WARN_ON(!has_vector())) + if (WARN_ON(!(has_vector() || has_xtheadvector()))) return; BUG_ON(!may_use_simd()); @@ -236,7 +236,7 @@ EXPORT_SYMBOL_GPL(kernel_vector_begin); */ void kernel_vector_end(void) { - if (WARN_ON(!has_vector())) + if (WARN_ON(!(has_vector() || has_xtheadvector()))) return; riscv_v_disable(); diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index 3c830a6f7ef4..2306ce3e5f22 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -114,29 +114,6 @@ void machine_shutdown(void) #endif } -static void machine_kexec_mask_interrupts(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - /* * machine_crash_shutdown - Prepare to kexec after a kernel crash * diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 3a42f6287909..068168046e0e 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -12,6 +12,8 @@ #include <asm/asm-offsets.h> #include <asm/ftrace.h> +#define ABI_SIZE_ON_STACK 80 + .text .macro SAVE_ABI_STATE @@ -26,12 +28,12 @@ * register if a0 was not saved. */ .macro SAVE_RET_ABI_STATE - addi sp, sp, -4*SZREG - REG_S s0, 2*SZREG(sp) - REG_S ra, 3*SZREG(sp) - REG_S a0, 1*SZREG(sp) - REG_S a1, 0*SZREG(sp) - addi s0, sp, 4*SZREG + addi sp, sp, -ABI_SIZE_ON_STACK + REG_S ra, 1*SZREG(sp) + REG_S s0, 8*SZREG(sp) + REG_S a0, 10*SZREG(sp) + REG_S a1, 11*SZREG(sp) + addi s0, sp, ABI_SIZE_ON_STACK .endm .macro RESTORE_ABI_STATE @@ -41,11 +43,11 @@ .endm .macro RESTORE_RET_ABI_STATE - REG_L ra, 3*SZREG(sp) - REG_L s0, 2*SZREG(sp) - REG_L a0, 1*SZREG(sp) - REG_L a1, 0*SZREG(sp) - addi sp, sp, 4*SZREG + REG_L ra, 1*SZREG(sp) + REG_L s0, 8*SZREG(sp) + REG_L a0, 10*SZREG(sp) + REG_L a1, 11*SZREG(sp) + addi sp, sp, ABI_SIZE_ON_STACK .endm SYM_TYPED_FUNC_START(ftrace_stub) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 1cd461f3d872..47d0ebeec93c 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -23,7 +23,7 @@ struct used_bucket { struct relocation_head { struct hlist_node node; - struct list_head *rel_entry; + struct list_head rel_entry; void *location; }; @@ -634,7 +634,7 @@ process_accumulated_relocations(struct module *me, location = rel_head_iter->location; list_for_each_entry_safe(rel_entry_iter, rel_entry_iter_tmp, - rel_head_iter->rel_entry, + &rel_head_iter->rel_entry, head) { curr_type = rel_entry_iter->type; reloc_handlers[curr_type].reloc_handler( @@ -704,16 +704,7 @@ static int add_relocation_to_accumulate(struct module *me, int type, return -ENOMEM; } - rel_head->rel_entry = - kmalloc(sizeof(struct list_head), GFP_KERNEL); - - if (!rel_head->rel_entry) { - kfree(entry); - kfree(rel_head); - return -ENOMEM; - } - - INIT_LIST_HEAD(rel_head->rel_entry); + INIT_LIST_HEAD(&rel_head->rel_entry); rel_head->location = location; INIT_HLIST_NODE(&rel_head->node); if (!current_head->first) { @@ -722,7 +713,6 @@ static int add_relocation_to_accumulate(struct module *me, int type, if (!bucket) { kfree(entry); - kfree(rel_head->rel_entry); kfree(rel_head); return -ENOMEM; } @@ -735,7 +725,7 @@ static int add_relocation_to_accumulate(struct module *me, int type, } /* Add relocation to head of discovered rel_head */ - list_add_tail(&entry->head, rel_head->rel_entry); + list_add_tail(&entry->head, &rel_head->rel_entry); return 0; } diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 380a0e8cecc0..c0738d6c6498 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -30,7 +30,7 @@ static void __kprobes arch_prepare_ss_slot(struct kprobe *p) p->ainsn.api.restore = (unsigned long)p->addr + len; patch_text_nosync(p->ainsn.api.insn, &p->opcode, len); - patch_text_nosync(p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn)); + patch_text_nosync((void *)p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn)); } static void __kprobes arch_prepare_simulate(struct kprobe *p) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 58b6482c2bf6..7c244de77180 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -190,7 +190,7 @@ void flush_thread(void) void arch_release_task_struct(struct task_struct *tsk) { /* Free the vector context of datap. */ - if (has_vector()) + if (has_vector() || has_xtheadvector()) riscv_v_thread_free(tsk); } @@ -240,7 +240,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.s[0] = 0; } p->thread.riscv_v_flags = 0; - if (has_vector()) + if (has_vector() || has_xtheadvector()) riscv_v_thread_alloc(p); p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ @@ -364,7 +364,7 @@ static bool try_to_set_pmm(unsigned long value) * disable it for tasks that already opted in to the relaxed ABI. */ -static struct ctl_table tagged_addr_sysctl_table[] = { +static const struct ctl_table tagged_addr_sysctl_table[] = { { .procname = "tagged_addr_disabled", .mode = 0644, diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 45010e71df86..4fe45daa6281 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -147,9 +147,7 @@ static void __init init_resources(void) res_idx = num_resources - 1; mem_res_sz = num_resources * sizeof(*mem_res); - mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); - if (!mem_res) - panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); + mem_res = memblock_alloc_or_panic(mem_res_sz, SMP_CACHE_BYTES); /* * Start by adding the reserved regions, if they overlap @@ -324,8 +322,8 @@ void __init setup_arch(char **cmdline_p) riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); - init_rt_signal_env(); apply_boot_alternatives(); + init_rt_signal_env(); if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && riscv_isa_extension_available(NULL, ZICBOM)) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index dcd282419456..08378fea3a11 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -189,7 +189,7 @@ static long restore_sigcontext(struct pt_regs *regs, return 0; case RISCV_V_MAGIC: - if (!has_vector() || !riscv_v_vstate_query(regs) || + if (!(has_vector() || has_xtheadvector()) || !riscv_v_vstate_query(regs) || size != riscv_v_sc_size) return -EINVAL; @@ -211,16 +211,10 @@ static size_t get_rt_frame_size(bool cal_all) frame_size = sizeof(*frame); - if (has_vector()) { + if (has_vector() || has_xtheadvector()) { if (cal_all || riscv_v_vstate_query(task_pt_regs(current))) total_context_size += riscv_v_sc_size; } - /* - * Preserved a __riscv_ctx_hdr for END signal context header if an - * extension uses __riscv_extra_ext_header - */ - if (total_context_size) - total_context_size += sizeof(struct __riscv_ctx_hdr); frame_size += total_context_size; @@ -284,7 +278,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, if (has_fpu()) err |= save_fp_state(regs, &sc->sc_fpregs); /* Save the vector state. */ - if (has_vector() && riscv_v_vstate_query(regs)) + if ((has_vector() || has_xtheadvector()) && riscv_v_vstate_query(regs)) err |= save_v_state(regs, (void __user **)&sc_ext_ptr); /* Write zero to fp-reserved space and check it on restore_sigcontext */ err |= __put_user(0, &sc->sc_extdesc.reserved); diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index c180a647a30e..d58b5e751286 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -43,6 +43,7 @@ enum ipi_message_type { unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = { [0 ... NR_CPUS-1] = INVALID_HARTID }; +EXPORT_SYMBOL_GPL(__cpuid_to_hartid_map); void __init smp_setup_processor_id(void) { diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 153a2db4c5fa..d4355c770c36 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -17,6 +17,7 @@ #ifdef CONFIG_FRAME_POINTER extern asmlinkage void handle_exception(void); +extern unsigned long ret_from_exception_end; static inline int fp_is_valid(unsigned long fp, unsigned long sp) { @@ -71,7 +72,8 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame->fp; pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, &frame->ra); - if (pc == (unsigned long)handle_exception) { + if (pc >= (unsigned long)handle_exception && + pc < (unsigned long)&ret_from_exception_end) { if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) break; diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index cb93adfffc48..04a4e5495512 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -15,6 +15,7 @@ #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/vector.h> +#include <asm/vendor_extensions/thead_hwprobe.h> #include <vdso/vsyscall.h> @@ -286,6 +287,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = riscv_timebase; break; + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: + hwprobe_isa_vendor_ext_thead_0(pair, cpus); + break; + /* * For forward compatibility, unknown keys don't fail the whole * call, but get their element key set to -1 and value set to 0 @@ -445,8 +450,7 @@ static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, static int __init init_hwprobe_vdso_data(void) { - struct vdso_data *vd = __arch_get_k_vdso_data(); - struct arch_vdso_time_data *avd = &vd->arch_data; + struct vdso_arch_data *avd = vdso_k_arch_data; u64 id_bitsmash = 0; struct riscv_hwprobe pair; int key; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 51ebfd23e007..8ff8e8b36524 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -35,7 +35,7 @@ int show_unhandled_signals = 1; -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns) { @@ -81,7 +81,7 @@ void die(struct pt_regs *regs, const char *str) oops_enter(); - spin_lock_irqsave(&die_lock, flags); + raw_spin_lock_irqsave(&die_lock, flags); console_verbose(); bust_spinlocks(1); @@ -100,7 +100,7 @@ void die(struct pt_regs *regs, const char *str) bust_spinlocks(0); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - spin_unlock_irqrestore(&die_lock, flags); + raw_spin_unlock_irqrestore(&die_lock, flags); oops_exit(); if (in_interrupt()) diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 3ca3ae4277e1..cc2895d1fbc2 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -13,20 +13,11 @@ #include <linux/err.h> #include <asm/page.h> #include <asm/vdso.h> -#include <linux/time_namespace.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <vdso/vsyscall.h> -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; - -#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) - -static union vdso_data_store vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = vdso_data_store.data; +#define VVAR_SIZE (VDSO_NR_PAGES << PAGE_SHIFT) struct __vdso_info { const char *name; @@ -79,78 +70,6 @@ static void __init __vdso_init(struct __vdso_info *vdso_info) vdso_info->cm->pages = vdso_pagelist; } -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -static const struct vm_special_mapping rv_vvar_map; - -/* - * The vvar mapping contains data for a specific time namespace, so when a task - * changes namespace we must unmap its vvar data for the old namespace. - * Subsequent faults will map in data for the new namespace. - * - * For more details see timens_setup_vdso_data(). - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - - mmap_read_lock(mm); - - for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, &rv_vvar_map)) - zap_vma_pages(vma); - } - - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long pfn; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - if (timens_page) - pfn = page_to_pfn(timens_page); - else - pfn = sym_to_pfn(vdso_data); - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = sym_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - - return vmf_insert_pfn(vma, vmf->address, pfn); -} - -static const struct vm_special_mapping rv_vvar_map = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static struct vm_special_mapping rv_vdso_map __ro_after_init = { .name = "[vdso]", .mremap = vdso_mremap, @@ -196,7 +115,7 @@ static int __setup_additional_pages(struct mm_struct *mm, unsigned long vdso_base, vdso_text_len, vdso_mapping_len; void *ret; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); vdso_text_len = vdso_info->vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ @@ -208,8 +127,7 @@ static int __setup_additional_pages(struct mm_struct *mm, goto up_fail; } - ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE, - (VM_READ | VM_MAYREAD | VM_PFNMAP), &rv_vvar_map); + ret = vdso_install_vvar_mapping(mm, vdso_base); if (IS_ERR(ret)) goto up_fail; diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 9a1b555e8733..ad73607abc28 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -2,7 +2,7 @@ # Copied from arch/tile/kernel/vdso/Makefile # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include # Symbols present in the vdso vdso-syms = rt_sigreturn ifdef CONFIG_64BIT diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index a158c029344f..2ddeba6c68dd 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -16,8 +16,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count, size_t cpusetsize, unsigned long *cpus, unsigned int flags) { - const struct vdso_data *vd = __arch_get_vdso_data(); - const struct arch_vdso_time_data *avd = &vd->arch_data; + const struct vdso_arch_data *avd = &vdso_u_arch_data; bool all_cpus = !cpusetsize && !cpus; struct riscv_hwprobe *p = pairs; struct riscv_hwprobe *end = pairs + pair_count; @@ -51,8 +50,7 @@ static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count, size_t cpusetsize, unsigned long *cpus, unsigned int flags) { - const struct vdso_data *vd = __arch_get_vdso_data(); - const struct arch_vdso_time_data *avd = &vd->arch_data; + const struct vdso_arch_data *avd = &vdso_u_arch_data; struct riscv_hwprobe *p = pairs; struct riscv_hwprobe *end = pairs + pair_count; unsigned char *c = (unsigned char *)cpus; diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index cbe2a179331d..8e86965a8aae 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -4,15 +4,14 @@ */ #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_ARCH(riscv) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 821818886fab..184f780c932d 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -33,7 +33,17 @@ int riscv_v_setup_vsize(void) { unsigned long this_vsize; - /* There are 32 vector registers with vlenb length. */ + /* + * There are 32 vector registers with vlenb length. + * + * If the thead,vlenb property was provided by the firmware, use that + * instead of probing the CSRs. + */ + if (thead_vlenb_of) { + riscv_v_vsize = thead_vlenb_of * 32; + return 0; + } + riscv_v_enable(); this_vsize = csr_read(CSR_VLENB) * 32; riscv_v_disable(); @@ -53,7 +63,7 @@ int riscv_v_setup_vsize(void) void __init riscv_v_setup_ctx_cache(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx", @@ -173,7 +183,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) u32 __user *epc = (u32 __user *)regs->epc; u32 insn = (u32)regs->badaddr; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return false; /* Do not handle if V is not supported, or disabled */ @@ -216,7 +226,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; next = riscv_v_ctrl_get_next(tsk); @@ -238,7 +248,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) long riscv_v_vstate_ctrl_get_current(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; return current->thread.vstate_ctrl & PR_RISCV_V_VSTATE_CTRL_MASK; @@ -249,7 +259,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; if (arg & ~PR_RISCV_V_VSTATE_CTRL_MASK) @@ -287,7 +297,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) #ifdef CONFIG_SYSCTL -static struct ctl_table riscv_v_default_vstate_table[] = { +static const struct ctl_table riscv_v_default_vstate_table[] = { { .procname = "riscv_v_default_allow", .data = &riscv_v_implicit_uacc, @@ -299,7 +309,7 @@ static struct ctl_table riscv_v_default_vstate_table[] = { static int __init riscv_v_sysctl_init(void) { - if (has_vector()) + if (has_vector() || has_xtheadvector()) if (!register_sysctl("abi", riscv_v_default_vstate_table)) return -EINVAL; return 0; @@ -309,7 +319,7 @@ static int __init riscv_v_sysctl_init(void) static int __init riscv_v_sysctl_init(void) { return 0; } #endif /* ! CONFIG_SYSCTL */ -static int riscv_v_init(void) +static int __init riscv_v_init(void) { return riscv_v_sysctl_init(); } diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c index a8126d118341..a31ff84740eb 100644 --- a/arch/riscv/kernel/vendor_extensions.c +++ b/arch/riscv/kernel/vendor_extensions.c @@ -6,6 +6,7 @@ #include <asm/vendorid_list.h> #include <asm/vendor_extensions.h> #include <asm/vendor_extensions/andes.h> +#include <asm/vendor_extensions/thead.h> #include <linux/array_size.h> #include <linux/types.h> @@ -14,6 +15,9 @@ struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = { #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES &riscv_isa_vendor_ext_list_andes, #endif +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + &riscv_isa_vendor_ext_list_thead, +#endif }; const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list); @@ -41,6 +45,12 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; break; #endif + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + case THEAD_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap; + break; + #endif default: return false; } diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile index 6a61aed944f1..866414c81a9f 100644 --- a/arch/riscv/kernel/vendor_extensions/Makefile +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o diff --git a/arch/riscv/kernel/vendor_extensions/thead.c b/arch/riscv/kernel/vendor_extensions/thead.c new file mode 100644 index 000000000000..519dbf70710a --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> + +#include <linux/array_size.h> +#include <linux/cpumask.h> +#include <linux/types.h> + +/* All T-Head vendor extensions supported in Linux */ +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_thead[] = { + __RISCV_ISA_EXT_DATA(xtheadvector, RISCV_ISA_VENDOR_EXT_XTHEADVECTOR), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_thead), + .ext_data = riscv_isa_vendor_ext_thead, +}; + +void disable_xtheadvector(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap[cpu].isa); + + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap.isa); +} diff --git a/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c new file mode 100644 index 000000000000..2eba34011786 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/vendor_extensions/thead.h> +#include <asm/vendor_extensions/thead_hwprobe.h> +#include <asm/vendor_extensions/vendor_hwprobe.h> + +#include <linux/cpumask.h> +#include <linux/types.h> + +#include <uapi/asm/hwprobe.h> +#include <uapi/asm/vendor/thead.h> + +void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus) +{ + VENDOR_EXTENSION_SUPPORTED(pair, cpus, + riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap, { + VENDOR_EXT_KEY(XTHEADVECTOR); + }); +} diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 0fb1840c3e0a..4e0bba91d284 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -30,6 +30,7 @@ kvm-y += vcpu_sbi_hsm.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_sbi_pmu.o kvm-y += vcpu_sbi_replace.o kvm-y += vcpu_sbi_sta.o +kvm-y += vcpu_sbi_system.o kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o kvm-y += vcpu_switch.o kvm-y += vcpu_timer.o diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index a8085cd8215e..29ef9c2133a9 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -974,7 +974,6 @@ int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu, if (imsic->vsfile_cpu >= 0) { writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE); - kvm_vcpu_kick(vcpu); } else { eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)]; set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip); diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 1fa8be5ee509..4b24705dc63a 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -172,8 +172,8 @@ module_init(riscv_kvm_init); static void __exit riscv_kvm_exit(void) { - kvm_riscv_teardown(); - kvm_exit(); + + kvm_riscv_teardown(); } module_exit(riscv_kvm_exit); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index e048dcc6e65e..60d684c76c58 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -34,7 +34,12 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, csr_exit_user), STATS_DESC_COUNTER(VCPU, csr_exit_kernel), STATS_DESC_COUNTER(VCPU, signal_exits), - STATS_DESC_COUNTER(VCPU, exits) + STATS_DESC_COUNTER(VCPU, exits), + STATS_DESC_COUNTER(VCPU, instr_illegal_exits), + STATS_DESC_COUNTER(VCPU, load_misaligned_exits), + STATS_DESC_COUNTER(VCPU, store_misaligned_exits), + STATS_DESC_COUNTER(VCPU, load_access_exits), + STATS_DESC_COUNTER(VCPU, store_access_exits), }; const struct kvm_stats_header kvm_vcpu_stats_header = { diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index fa98e5c024b2..6e0c18412795 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -165,6 +165,17 @@ void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu, vcpu->arch.guest_context.sstatus |= SR_SPP; } +static inline int vcpu_redirect(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap) +{ + int ret = -EFAULT; + + if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) { + kvm_riscv_vcpu_trap_redirect(vcpu, trap); + ret = 1; + } + return ret; +} + /* * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. @@ -183,14 +194,32 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, run->exit_reason = KVM_EXIT_UNKNOWN; switch (trap->scause) { case EXC_INST_ILLEGAL: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ILLEGAL_INSN); + vcpu->stat.instr_illegal_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_LOAD_MISALIGNED: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_MISALIGNED_LOAD); + vcpu->stat.load_misaligned_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_STORE_MISALIGNED: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_MISALIGNED_STORE); + vcpu->stat.store_misaligned_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_LOAD_ACCESS: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ACCESS_LOAD); + vcpu->stat.load_access_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_STORE_ACCESS: - if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) { - kvm_riscv_vcpu_trap_redirect(vcpu, trap); - ret = 1; - } + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ACCESS_STORE); + vcpu->stat.store_access_exits++; + ret = vcpu_redirect(vcpu, trap); + break; + case EXC_INST_ACCESS: + ret = vcpu_redirect(vcpu, trap); break; case EXC_VIRTUAL_INST_FAULT: if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 753f66c8b70a..43ee8e33ba23 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -46,6 +46,8 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), @@ -65,6 +67,7 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), + KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), KVM_ISA_EXT_ARR(ZICOND), KVM_ISA_EXT_ARR(ZICSR), @@ -145,6 +148,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_SSTC: case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: + case KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_RISCV_ISA_EXT_ZABHA: case KVM_RISCV_ISA_EXT_ZACAS: case KVM_RISCV_ISA_EXT_ZAWRS: case KVM_RISCV_ISA_EXT_ZBA: @@ -162,6 +167,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZFA: case KVM_RISCV_ISA_EXT_ZFH: case KVM_RISCV_ISA_EXT_ZFHMIN: + case KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_RISCV_ISA_EXT_ZICNTR: case KVM_RISCV_ISA_EXT_ZICOND: case KVM_RISCV_ISA_EXT_ZICSR: @@ -197,7 +203,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_SVADE: /* * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. - * Svade is not allowed to disable when the platform use Svade. + * Svade can't be disabled unless we support Svadu. */ return arch_has_hw_pte_young(); default: diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 2707a51b082c..78ac3216a54d 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -666,6 +666,7 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba .type = etype, .size = sizeof(struct perf_event_attr), .pinned = true, + .disabled = true, /* * It should never reach here if the platform doesn't support the sscofpmf * extension as mode filtering won't work without it. diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 6e704ed86a83..d1c83a77735e 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -71,6 +71,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { .ext_ptr = &vcpu_sbi_ext_dbcn, }, { + .ext_idx = KVM_RISCV_SBI_EXT_SUSP, + .ext_ptr = &vcpu_sbi_ext_susp, + }, + { .ext_idx = KVM_RISCV_SBI_EXT_STA, .ext_ptr = &vcpu_sbi_ext_sta, }, diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c index dce667f4b6ab..3070bb31745d 100644 --- a/arch/riscv/kvm/vcpu_sbi_hsm.c +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -9,6 +9,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> +#include <linux/wordpart.h> #include <asm/sbi.h> #include <asm/kvm_vcpu_sbi.h> @@ -79,12 +80,12 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!kvm_riscv_vcpu_stopped(target_vcpu)) - return SBI_HSM_STATE_STARTED; - else if (vcpu->stat.generic.blocking) + if (kvm_riscv_vcpu_stopped(target_vcpu)) + return SBI_HSM_STATE_STOPPED; + else if (target_vcpu->stat.generic.blocking) return SBI_HSM_STATE_SUSPENDED; else - return SBI_HSM_STATE_STOPPED; + return SBI_HSM_STATE_STARTED; } static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, @@ -109,7 +110,7 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, } return 0; case SBI_EXT_HSM_HART_SUSPEND: - switch (cp->a0) { + switch (lower_32_bits(cp->a0)) { case SBI_HSM_SUSPEND_RET_DEFAULT: kvm_riscv_vcpu_wfi(vcpu); break; diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 9c2ab3dfa93a..5fbf3f94f1e8 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -21,7 +21,7 @@ static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, u64 next_cycle; if (cp->a6 != SBI_EXT_TIME_SET_TIMER) { - retdata->err_val = SBI_ERR_INVALID_PARAM; + retdata->err_val = SBI_ERR_NOT_SUPPORTED; return 0; } @@ -51,9 +51,10 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_cpu_context *cp = &vcpu->arch.guest_context; unsigned long hmask = cp->a0; unsigned long hbase = cp->a1; + unsigned long hart_bit = 0, sentmask = 0; if (cp->a6 != SBI_EXT_IPI_SEND_IPI) { - retdata->err_val = SBI_ERR_INVALID_PARAM; + retdata->err_val = SBI_ERR_NOT_SUPPORTED; return 0; } @@ -62,15 +63,23 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, if (hbase != -1UL) { if (tmp->vcpu_id < hbase) continue; - if (!(hmask & (1UL << (tmp->vcpu_id - hbase)))) + hart_bit = tmp->vcpu_id - hbase; + if (hart_bit >= __riscv_xlen) + goto done; + if (!(hmask & (1UL << hart_bit))) continue; } ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT); if (ret < 0) break; + sentmask |= 1UL << hart_bit; kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RCVD); } +done: + if (hbase != -1UL && (hmask ^ sentmask)) + retdata->err_val = SBI_ERR_INVALID_PARAM; + return ret; } diff --git a/arch/riscv/kvm/vcpu_sbi_system.c b/arch/riscv/kvm/vcpu_sbi_system.c new file mode 100644 index 000000000000..bc0ebba89003 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_system.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 Ventana Micro Systems Inc. + */ + +#include <linux/kvm_host.h> +#include <linux/wordpart.h> + +#include <asm/kvm_vcpu_sbi.h> +#include <asm/sbi.h> + +static int kvm_sbi_ext_susp_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + struct kvm_cpu_context *reset_cntx; + unsigned long funcid = cp->a6; + unsigned long hva, i; + struct kvm_vcpu *tmp; + + switch (funcid) { + case SBI_EXT_SUSP_SYSTEM_SUSPEND: + if (lower_32_bits(cp->a0) != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) { + retdata->err_val = SBI_ERR_INVALID_PARAM; + return 0; + } + + if (!(cp->sstatus & SR_SPP)) { + retdata->err_val = SBI_ERR_FAILURE; + return 0; + } + + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, cp->a1 >> PAGE_SHIFT, NULL); + if (kvm_is_error_hva(hva)) { + retdata->err_val = SBI_ERR_INVALID_ADDRESS; + return 0; + } + + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + if (tmp == vcpu) + continue; + if (!kvm_riscv_vcpu_stopped(tmp)) { + retdata->err_val = SBI_ERR_DENIED; + return 0; + } + } + + spin_lock(&vcpu->arch.reset_cntx_lock); + reset_cntx = &vcpu->arch.guest_reset_context; + reset_cntx->sepc = cp->a1; + reset_cntx->a0 = vcpu->vcpu_id; + reset_cntx->a1 = cp->a2; + spin_unlock(&vcpu->arch.reset_cntx_lock); + + kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); + + /* userspace provides the suspend implementation */ + kvm_riscv_vcpu_sbi_forward(vcpu, run); + retdata->uexit = true; + break; + default: + retdata->err_val = SBI_ERR_NOT_SUPPORTED; + break; + } + + return 0; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_susp = { + .extid_start = SBI_EXT_SUSP, + .extid_end = SBI_EXT_SUSP, + .default_disabled = true, + .handler = kvm_sbi_ext_susp_handler, +}; diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index 96e7a4e463f7..ff672fa71fcc 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -248,18 +248,19 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu) if (t->init_done) return -EINVAL; - hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); t->init_done = true; t->next_set = false; /* Enable sstc for every vcpu if available in hardware */ if (riscv_isa_extension_available(NULL, SSTC)) { t->sstc_enabled = true; - t->hrt.function = kvm_riscv_vcpu_vstimer_expired; + hrtimer_setup(&t->hrt, kvm_riscv_vcpu_vstimer_expired, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); t->timer_next_event = kvm_riscv_vcpu_update_vstimecmp; } else { t->sstc_enabled = false; - t->hrt.function = kvm_riscv_vcpu_hrtimer_expired; + hrtimer_setup(&t->hrt, kvm_riscv_vcpu_hrtimer_expired, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); t->timer_next_event = kvm_riscv_vcpu_update_hrtimer; } diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 8eec6b69a875..b1c46153606a 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -15,8 +15,12 @@ endif lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o -lib-$(CONFIG_RISCV_ISA_ZBC) += crc32.o - +obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o +crc32-riscv-y := crc32.o crc32_msb.o crc32_lsb.o +obj-$(CONFIG_CRC64_ARCH) += crc64-riscv.o +crc64-riscv-y := crc64.o crc64_msb.o crc64_lsb.o +obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-riscv.o +crc-t10dif-riscv-y := crc-t10dif.o crc16_msb.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RISCV_ISA_V) += xor.o lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o diff --git a/arch/riscv/lib/crc-clmul-consts.h b/arch/riscv/lib/crc-clmul-consts.h new file mode 100644 index 000000000000..8d73449235ef --- /dev/null +++ b/arch/riscv/lib/crc-clmul-consts.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CRC constants generated by: + * + * ./scripts/gen-crc-consts.py riscv_clmul crc16_msb_0x8bb7,crc32_msb_0x04c11db7,crc32_lsb_0xedb88320,crc32_lsb_0x82f63b78,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5 + * + * Do not edit manually. + */ + +struct crc_clmul_consts { + unsigned long fold_across_2_longs_const_hi; + unsigned long fold_across_2_longs_const_lo; + unsigned long barrett_reduction_const_1; + unsigned long barrett_reduction_const_2; +}; + +/* + * Constants generated for most-significant-bit-first CRC-16 using + * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 + */ +static const struct crc_clmul_consts crc16_msb_0x8bb7_consts __maybe_unused = { +#ifdef CONFIG_64BIT + .fold_across_2_longs_const_hi = 0x0000000000001faa, /* x^192 mod G */ + .fold_across_2_longs_const_lo = 0x000000000000a010, /* x^128 mod G */ + .barrett_reduction_const_1 = 0xfb2d2bfc0e99d245, /* floor(x^79 / G) */ + .barrett_reduction_const_2 = 0x0000000000008bb7, /* G - x^16 */ +#else + .fold_across_2_longs_const_hi = 0x00005890, /* x^96 mod G */ + .fold_across_2_longs_const_lo = 0x0000f249, /* x^64 mod G */ + .barrett_reduction_const_1 = 0xfb2d2bfc, /* floor(x^47 / G) */ + .barrett_reduction_const_2 = 0x00008bb7, /* G - x^16 */ +#endif +}; + +/* + * Constants generated for most-significant-bit-first CRC-32 using + * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + + * x^5 + x^4 + x^2 + x^1 + x^0 + */ +static const struct crc_clmul_consts crc32_msb_0x04c11db7_consts __maybe_unused = { +#ifdef CONFIG_64BIT + .fold_across_2_longs_const_hi = 0x00000000c5b9cd4c, /* x^192 mod G */ + .fold_across_2_longs_const_lo = 0x00000000e8a45605, /* x^128 mod G */ + .barrett_reduction_const_1 = 0x826880efa40da72d, /* floor(x^95 / G) */ + .barrett_reduction_const_2 = 0x0000000004c11db7, /* G - x^32 */ +#else + .fold_across_2_longs_const_hi = 0xf200aa66, /* x^96 mod G */ + .fold_across_2_longs_const_lo = 0x490d678d, /* x^64 mod G */ + .barrett_reduction_const_1 = 0x826880ef, /* floor(x^63 / G) */ + .barrett_reduction_const_2 = 0x04c11db7, /* G - x^32 */ +#endif +}; + +/* + * Constants generated for least-significant-bit-first CRC-32 using + * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + + * x^5 + x^4 + x^2 + x^1 + x^0 + */ +static const struct crc_clmul_consts crc32_lsb_0xedb88320_consts __maybe_unused = { +#ifdef CONFIG_64BIT + .fold_across_2_longs_const_hi = 0x65673b4600000000, /* x^191 mod G */ + .fold_across_2_longs_const_lo = 0x9ba54c6f00000000, /* x^127 mod G */ + .barrett_reduction_const_1 = 0xb4e5b025f7011641, /* floor(x^95 / G) */ + .barrett_reduction_const_2 = 0x00000000edb88320, /* (G - x^32) * x^32 */ +#else + .fold_across_2_longs_const_hi = 0xccaa009e, /* x^95 mod G */ + .fold_across_2_longs_const_lo = 0xb8bc6765, /* x^63 mod G */ + .barrett_reduction_const_1 = 0xf7011641, /* floor(x^63 / G) */ + .barrett_reduction_const_2 = 0xedb88320, /* (G - x^32) * x^0 */ +#endif +}; + +/* + * Constants generated for least-significant-bit-first CRC-32 using + * G(x) = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 + + * x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0 + */ +static const struct crc_clmul_consts crc32_lsb_0x82f63b78_consts __maybe_unused = { +#ifdef CONFIG_64BIT + .fold_across_2_longs_const_hi = 0x3743f7bd00000000, /* x^191 mod G */ + .fold_across_2_longs_const_lo = 0x3171d43000000000, /* x^127 mod G */ + .barrett_reduction_const_1 = 0x4869ec38dea713f1, /* floor(x^95 / G) */ + .barrett_reduction_const_2 = 0x0000000082f63b78, /* (G - x^32) * x^32 */ +#else + .fold_across_2_longs_const_hi = 0x493c7d27, /* x^95 mod G */ + .fold_across_2_longs_const_lo = 0xdd45aab8, /* x^63 mod G */ + .barrett_reduction_const_1 = 0xdea713f1, /* floor(x^63 / G) */ + .barrett_reduction_const_2 = 0x82f63b78, /* (G - x^32) * x^0 */ +#endif +}; + +/* + * Constants generated for most-significant-bit-first CRC-64 using + * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + + * x^7 + x^4 + x^1 + x^0 + */ +#ifdef CONFIG_64BIT +static const struct crc_clmul_consts crc64_msb_0x42f0e1eba9ea3693_consts __maybe_unused = { + .fold_across_2_longs_const_hi = 0x4eb938a7d257740e, /* x^192 mod G */ + .fold_across_2_longs_const_lo = 0x05f5c3c7eb52fab6, /* x^128 mod G */ + .barrett_reduction_const_1 = 0xabc694e836627c39, /* floor(x^127 / G) */ + .barrett_reduction_const_2 = 0x42f0e1eba9ea3693, /* G - x^64 */ +}; +#endif + +/* + * Constants generated for least-significant-bit-first CRC-64 using + * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 + + * x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 + + * x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 + + * x^4 + x^3 + x^0 + */ +#ifdef CONFIG_64BIT +static const struct crc_clmul_consts crc64_lsb_0x9a6c9329ac4bc9b5_consts __maybe_unused = { + .fold_across_2_longs_const_hi = 0xeadc41fd2ba3d420, /* x^191 mod G */ + .fold_across_2_longs_const_lo = 0x21e9761e252621ac, /* x^127 mod G */ + .barrett_reduction_const_1 = 0x27ecfa329aef9f77, /* floor(x^127 / G) */ + .barrett_reduction_const_2 = 0x9a6c9329ac4bc9b5, /* (G - x^64) * x^0 */ +}; +#endif diff --git a/arch/riscv/lib/crc-clmul-template.h b/arch/riscv/lib/crc-clmul-template.h new file mode 100644 index 000000000000..77187e7f1762 --- /dev/null +++ b/arch/riscv/lib/crc-clmul-template.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright 2025 Google LLC */ + +/* + * This file is a "template" that generates a CRC function optimized using the + * RISC-V Zbc (scalar carryless multiplication) extension. The includer of this + * file must define the following parameters to specify the type of CRC: + * + * crc_t: the data type of the CRC, e.g. u32 for a 32-bit CRC + * LSB_CRC: 0 for a msb (most-significant-bit) first CRC, i.e. natural + * mapping between bits and polynomial coefficients + * 1 for a lsb (least-significant-bit) first CRC, i.e. reflected + * mapping between bits and polynomial coefficients + */ + +#include <asm/byteorder.h> +#include <linux/minmax.h> + +#define CRC_BITS (8 * sizeof(crc_t)) /* a.k.a. 'n' */ + +static inline unsigned long clmul(unsigned long a, unsigned long b) +{ + unsigned long res; + + asm(".option push\n" + ".option arch,+zbc\n" + "clmul %0, %1, %2\n" + ".option pop\n" + : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +static inline unsigned long clmulh(unsigned long a, unsigned long b) +{ + unsigned long res; + + asm(".option push\n" + ".option arch,+zbc\n" + "clmulh %0, %1, %2\n" + ".option pop\n" + : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +static inline unsigned long clmulr(unsigned long a, unsigned long b) +{ + unsigned long res; + + asm(".option push\n" + ".option arch,+zbc\n" + "clmulr %0, %1, %2\n" + ".option pop\n" + : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +/* + * crc_load_long() loads one "unsigned long" of aligned data bytes, producing a + * polynomial whose bit order matches the CRC's bit order. + */ +#ifdef CONFIG_64BIT +# if LSB_CRC +# define crc_load_long(x) le64_to_cpup(x) +# else +# define crc_load_long(x) be64_to_cpup(x) +# endif +#else +# if LSB_CRC +# define crc_load_long(x) le32_to_cpup(x) +# else +# define crc_load_long(x) be32_to_cpup(x) +# endif +#endif + +/* XOR @crc into the end of @msgpoly that represents the high-order terms. */ +static inline unsigned long +crc_clmul_prep(crc_t crc, unsigned long msgpoly) +{ +#if LSB_CRC + return msgpoly ^ crc; +#else + return msgpoly ^ ((unsigned long)crc << (BITS_PER_LONG - CRC_BITS)); +#endif +} + +/* + * Multiply the long-sized @msgpoly by x^n (a.k.a. x^CRC_BITS) and reduce it + * modulo the generator polynomial G. This gives the CRC of @msgpoly. + */ +static inline crc_t +crc_clmul_long(unsigned long msgpoly, const struct crc_clmul_consts *consts) +{ + unsigned long tmp; + + /* + * First step of Barrett reduction with integrated multiplication by + * x^n: calculate floor((msgpoly * x^n) / G). This is the value by + * which G needs to be multiplied to cancel out the x^n and higher terms + * of msgpoly * x^n. Do it using the following formula: + * + * msb-first: + * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G)) / x^(BITS_PER_LONG-1)) + * lsb-first: + * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G) * x) / x^BITS_PER_LONG) + * + * barrett_reduction_const_1 contains floor(x^(BITS_PER_LONG-1+n) / G), + * which fits a long exactly. Using any lower power of x there would + * not carry enough precision through the calculation, while using any + * higher power of x would require extra instructions to handle a wider + * multiplication. In the msb-first case, using this power of x results + * in needing a floored division by x^(BITS_PER_LONG-1), which matches + * what clmulr produces. In the lsb-first case, a factor of x gets + * implicitly introduced by each carryless multiplication (shown as + * '* x' above), and the floored division instead needs to be by + * x^BITS_PER_LONG which matches what clmul produces. + */ +#if LSB_CRC + tmp = clmul(msgpoly, consts->barrett_reduction_const_1); +#else + tmp = clmulr(msgpoly, consts->barrett_reduction_const_1); +#endif + + /* + * Second step of Barrett reduction: + * + * crc := (msgpoly * x^n) + (G * floor((msgpoly * x^n) / G)) + * + * This reduces (msgpoly * x^n) modulo G by adding the appropriate + * multiple of G to it. The result uses only the x^0..x^(n-1) terms. + * HOWEVER, since the unreduced value (msgpoly * x^n) is zero in those + * terms in the first place, it is more efficient to do the equivalent: + * + * crc := ((G - x^n) * floor((msgpoly * x^n) / G)) mod x^n + * + * In the lsb-first case further modify it to the following which avoids + * a shift, as the crc ends up in the physically low n bits from clmulr: + * + * product := ((G - x^n) * x^(BITS_PER_LONG - n)) * floor((msgpoly * x^n) / G) * x + * crc := floor(product / x^(BITS_PER_LONG + 1 - n)) mod x^n + * + * barrett_reduction_const_2 contains the constant multiplier (G - x^n) + * or (G - x^n) * x^(BITS_PER_LONG - n) from the formulas above. The + * cast of the result to crc_t is essential, as it applies the mod x^n! + */ +#if LSB_CRC + return clmulr(tmp, consts->barrett_reduction_const_2); +#else + return clmul(tmp, consts->barrett_reduction_const_2); +#endif +} + +/* Update @crc with the data from @msgpoly. */ +static inline crc_t +crc_clmul_update_long(crc_t crc, unsigned long msgpoly, + const struct crc_clmul_consts *consts) +{ + return crc_clmul_long(crc_clmul_prep(crc, msgpoly), consts); +} + +/* Update @crc with 1 <= @len < sizeof(unsigned long) bytes of data. */ +static inline crc_t +crc_clmul_update_partial(crc_t crc, const u8 *p, size_t len, + const struct crc_clmul_consts *consts) +{ + unsigned long msgpoly; + size_t i; + +#if LSB_CRC + msgpoly = (unsigned long)p[0] << (BITS_PER_LONG - 8); + for (i = 1; i < len; i++) + msgpoly = (msgpoly >> 8) ^ ((unsigned long)p[i] << (BITS_PER_LONG - 8)); +#else + msgpoly = p[0]; + for (i = 1; i < len; i++) + msgpoly = (msgpoly << 8) ^ p[i]; +#endif + + if (len >= sizeof(crc_t)) { + #if LSB_CRC + msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len); + #else + msgpoly ^= (unsigned long)crc << (8*len - CRC_BITS); + #endif + return crc_clmul_long(msgpoly, consts); + } +#if LSB_CRC + msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len); + return crc_clmul_long(msgpoly, consts) ^ (crc >> (8*len)); +#else + msgpoly ^= crc >> (CRC_BITS - 8*len); + return crc_clmul_long(msgpoly, consts) ^ (crc << (8*len)); +#endif +} + +static inline crc_t +crc_clmul(crc_t crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + size_t align; + + /* This implementation assumes that the CRC fits in an unsigned long. */ + BUILD_BUG_ON(sizeof(crc_t) > sizeof(unsigned long)); + + /* If the buffer is not long-aligned, align it. */ + align = (unsigned long)p % sizeof(unsigned long); + if (align && len) { + align = min(sizeof(unsigned long) - align, len); + crc = crc_clmul_update_partial(crc, p, align, consts); + p += align; + len -= align; + } + + if (len >= 4 * sizeof(unsigned long)) { + unsigned long m0, m1; + + m0 = crc_clmul_prep(crc, crc_load_long(p)); + m1 = crc_load_long(p + sizeof(unsigned long)); + p += 2 * sizeof(unsigned long); + len -= 2 * sizeof(unsigned long); + /* + * Main loop. Each iteration starts with a message polynomial + * (x^BITS_PER_LONG)*m0 + m1, then logically extends it by two + * more longs of data to form x^(3*BITS_PER_LONG)*m0 + + * x^(2*BITS_PER_LONG)*m1 + x^BITS_PER_LONG*m2 + m3, then + * "folds" that back into a congruent (modulo G) value that uses + * just m0 and m1 again. This is done by multiplying m0 by the + * precomputed constant (x^(3*BITS_PER_LONG) mod G) and m1 by + * the precomputed constant (x^(2*BITS_PER_LONG) mod G), then + * adding the results to m2 and m3 as appropriate. Each such + * multiplication produces a result twice the length of a long, + * which in RISC-V is two instructions clmul and clmulh. + * + * This could be changed to fold across more than 2 longs at a + * time if there is a CPU that can take advantage of it. + */ + do { + unsigned long p0, p1, p2, p3; + + p0 = clmulh(m0, consts->fold_across_2_longs_const_hi); + p1 = clmul(m0, consts->fold_across_2_longs_const_hi); + p2 = clmulh(m1, consts->fold_across_2_longs_const_lo); + p3 = clmul(m1, consts->fold_across_2_longs_const_lo); + m0 = (LSB_CRC ? p1 ^ p3 : p0 ^ p2) ^ crc_load_long(p); + m1 = (LSB_CRC ? p0 ^ p2 : p1 ^ p3) ^ + crc_load_long(p + sizeof(unsigned long)); + + p += 2 * sizeof(unsigned long); + len -= 2 * sizeof(unsigned long); + } while (len >= 2 * sizeof(unsigned long)); + + crc = crc_clmul_long(m0, consts); + crc = crc_clmul_update_long(crc, m1, consts); + } + + while (len >= sizeof(unsigned long)) { + crc = crc_clmul_update_long(crc, crc_load_long(p), consts); + p += sizeof(unsigned long); + len -= sizeof(unsigned long); + } + + if (len) + crc = crc_clmul_update_partial(crc, p, len, consts); + + return crc; +} diff --git a/arch/riscv/lib/crc-clmul.h b/arch/riscv/lib/crc-clmul.h new file mode 100644 index 000000000000..dd1736245815 --- /dev/null +++ b/arch/riscv/lib/crc-clmul.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright 2025 Google LLC */ + +#ifndef _RISCV_CRC_CLMUL_H +#define _RISCV_CRC_CLMUL_H + +#include <linux/types.h> +#include "crc-clmul-consts.h" + +u16 crc16_msb_clmul(u16 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +u32 crc32_msb_clmul(u32 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +#ifdef CONFIG_64BIT +u64 crc64_msb_clmul(u64 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +#endif + +#endif /* _RISCV_CRC_CLMUL_H */ diff --git a/arch/riscv/lib/crc-t10dif.c b/arch/riscv/lib/crc-t10dif.c new file mode 100644 index 000000000000..e6b0051ccd86 --- /dev/null +++ b/arch/riscv/lib/crc-t10dif.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized CRC-T10DIF function + * + * Copyright 2025 Google LLC + */ + +#include <asm/hwcap.h> +#include <asm/alternative-macros.h> +#include <linux/crc-t10dif.h> +#include <linux/module.h> + +#include "crc-clmul.h" + +u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc16_msb_clmul(crc, p, len, &crc16_msb_0x8bb7_consts); + return crc_t10dif_generic(crc, p, len); +} +EXPORT_SYMBOL(crc_t10dif_arch); + +MODULE_DESCRIPTION("RISC-V optimized CRC-T10DIF function"); +MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crc16_msb.c b/arch/riscv/lib/crc16_msb.c new file mode 100644 index 000000000000..554d295e95f5 --- /dev/null +++ b/arch/riscv/lib/crc16_msb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized most-significant-bit-first CRC16 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u16 crc_t; +#define LSB_CRC 0 +#include "crc-clmul-template.h" + +u16 crc16_msb_clmul(u16 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c index d7dc599af3ef..a3188b7d9c40 100644 --- a/arch/riscv/lib/crc32.c +++ b/arch/riscv/lib/crc32.c @@ -1,294 +1,53 @@ -// SPDX-License-Identifier: GPL-2.0-only +// SPDX-License-Identifier: GPL-2.0-or-later /* - * Accelerated CRC32 implementation with Zbc extension. + * RISC-V optimized CRC32 functions * - * Copyright (C) 2024 Intel Corporation + * Copyright 2025 Google LLC */ #include <asm/hwcap.h> #include <asm/alternative-macros.h> -#include <asm/byteorder.h> - -#include <linux/types.h> -#include <linux/minmax.h> -#include <linux/crc32poly.h> #include <linux/crc32.h> -#include <linux/byteorder/generic.h> - -/* - * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for - * better understanding of how this math works. - * - * let "+" denotes polynomial add (XOR) - * let "-" denotes polynomial sub (XOR) - * let "*" denotes polynomial multiplication - * let "/" denotes polynomial floor division - * let "S" denotes source data, XLEN bit wide - * let "P" denotes CRC32 polynomial - * let "T" denotes 2^(XLEN+32) - * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit - * - * crc32(S, P) - * => S * (2^32) - S * (2^32) / P * P - * => lowest 32 bits of: S * (2^32) / P * P - * => lowest 32 bits of: S * (2^32) * (T / P) / T * P - * => lowest 32 bits of: S * (2^32) * quotient / T * P - * => lowest 32 bits of: S * quotient / 2^XLEN * P - * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P - * => clmul_low_part(clmul_high_part(S, QT) + S, P) - * - * In terms of below implementations, the BE case is more intuitive, since the - * higher order bit sits at more significant position. - */ - -#if __riscv_xlen == 64 -/* Slide by XLEN bits per iteration */ -# define STEP_ORDER 3 - -/* Each below polynomial quotient has an implicit bit for 2^XLEN */ +#include <linux/module.h> -/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */ -# define CRC32_POLY_QT_LE 0x5a72d812fb808b20 +#include "crc-clmul.h" -/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */ -# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8 - -/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be - * the same as the bit-reversed version of CRC32_POLY_QT_LE - */ -# define CRC32_POLY_QT_BE 0x04d101df481b4e5a - -static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr) +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) { - return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr); + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc32_lsb_clmul(crc, p, len, + &crc32_lsb_0xedb88320_consts); + return crc32_le_base(crc, p, len); } +EXPORT_SYMBOL(crc32_le_arch); -static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) { - u32 crc; - - /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ - asm volatile (".option push\n" - ".option arch,+zbc\n" - "clmul %0, %1, %2\n" - "slli %0, %0, 1\n" - "xor %0, %0, %1\n" - "clmulr %0, %0, %3\n" - "srli %0, %0, 32\n" - ".option pop\n" - : "=&r" (crc) - : "r" (s), - "r" (poly_qt), - "r" ((u64)poly << 32) - :); - return crc; -} - -static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr) -{ - return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr); -} - -#elif __riscv_xlen == 32 -# define STEP_ORDER 2 -/* Each quotient should match the upper half of its analog in RV64 */ -# define CRC32_POLY_QT_LE 0xfb808b20 -# define CRC32C_POLY_QT_LE 0x6f5389f8 -# define CRC32_POLY_QT_BE 0x04d101df - -static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr) -{ - return crc ^ (__force u32)__cpu_to_le32(*ptr); -} - -static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) -{ - u32 crc; - - /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ - asm volatile (".option push\n" - ".option arch,+zbc\n" - "clmul %0, %1, %2\n" - "slli %0, %0, 1\n" - "xor %0, %0, %1\n" - "clmulr %0, %0, %3\n" - ".option pop\n" - : "=&r" (crc) - : "r" (s), - "r" (poly_qt), - "r" (poly) - :); - return crc; -} - -static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr) -{ - return crc ^ (__force u32)__cpu_to_be32(*ptr); -} - -#else -# error "Unexpected __riscv_xlen" -#endif - -static inline u32 crc32_be_zbc(unsigned long s) -{ - u32 crc; - - asm volatile (".option push\n" - ".option arch,+zbc\n" - "clmulh %0, %1, %2\n" - "xor %0, %0, %1\n" - "clmul %0, %0, %3\n" - ".option pop\n" - : "=&r" (crc) - : "r" (s), - "r" (CRC32_POLY_QT_BE), - "r" (CRC32_POLY_BE) - :); - return crc; -} - -#define STEP (1 << STEP_ORDER) -#define OFFSET_MASK (STEP - 1) - -typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len); - -static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p, - size_t len, u32 poly, - unsigned long poly_qt) -{ - size_t bits = len * 8; - unsigned long s = 0; - u32 crc_low = 0; - - for (int i = 0; i < len; i++) - s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8); - - s ^= (unsigned long)crc << (__riscv_xlen - bits); - if (__riscv_xlen == 32 || len < sizeof(u32)) - crc_low = crc >> bits; - - crc = crc32_le_zbc(s, poly, poly_qt); - crc ^= crc_low; - - return crc; -} - -static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, - size_t len, u32 poly, - unsigned long poly_qt, - fallback crc_fb) -{ - size_t offset, head_len, tail_len; - unsigned long const *p_ul; - unsigned long s; - - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBC, 1) - : : : : legacy); - - /* Handle the unaligned head. */ - offset = (unsigned long)p & OFFSET_MASK; - if (offset && len) { - head_len = min(STEP - offset, len); - crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt); - p += head_len; - len -= head_len; - } - - tail_len = len & OFFSET_MASK; - len = len >> STEP_ORDER; - p_ul = (unsigned long const *)p; - - for (int i = 0; i < len; i++) { - s = crc32_le_prep(crc, p_ul); - crc = crc32_le_zbc(s, poly, poly_qt); - p_ul++; - } - - /* Handle the tail bytes. */ - p = (unsigned char const *)p_ul; - if (tail_len) - crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt); - - return crc; - -legacy: - return crc_fb(crc, p, len); -} - -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) -{ - return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE, - crc32_le_base); + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc32_msb_clmul(crc, p, len, + &crc32_msb_0x04c11db7_consts); + return crc32_be_base(crc, p, len); } +EXPORT_SYMBOL(crc32_be_arch); -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 crc32c_arch(u32 crc, const u8 *p, size_t len) { - return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, - CRC32C_POLY_QT_LE, __crc32c_le_base); + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc32_lsb_clmul(crc, p, len, + &crc32_lsb_0x82f63b78_consts); + return crc32c_base(crc, p, len); } +EXPORT_SYMBOL(crc32c_arch); -static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, - size_t len) +u32 crc32_optimizations(void) { - size_t bits = len * 8; - unsigned long s = 0; - u32 crc_low = 0; - - s = 0; - for (int i = 0; i < len; i++) - s = *p++ | (s << 8); - - if (__riscv_xlen == 32 || len < sizeof(u32)) { - s ^= crc >> (32 - bits); - crc_low = crc << bits; - } else { - s ^= (unsigned long)crc << (bits - 32); - } - - crc = crc32_be_zbc(s); - crc ^= crc_low; - - return crc; + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; } +EXPORT_SYMBOL(crc32_optimizations); -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) -{ - size_t offset, head_len, tail_len; - unsigned long const *p_ul; - unsigned long s; - - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBC, 1) - : : : : legacy); - - /* Handle the unaligned head. */ - offset = (unsigned long)p & OFFSET_MASK; - if (offset && len) { - head_len = min(STEP - offset, len); - crc = crc32_be_unaligned(crc, p, head_len); - p += head_len; - len -= head_len; - } - - tail_len = len & OFFSET_MASK; - len = len >> STEP_ORDER; - p_ul = (unsigned long const *)p; - - for (int i = 0; i < len; i++) { - s = crc32_be_prep(crc, p_ul); - crc = crc32_be_zbc(s); - p_ul++; - } - - /* Handle the tail bytes. */ - p = (unsigned char const *)p_ul; - if (tail_len) - crc = crc32_be_unaligned(crc, p, tail_len); - - return crc; - -legacy: - return crc32_be_base(crc, p, len); -} +MODULE_DESCRIPTION("RISC-V optimized CRC32 functions"); +MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crc32_lsb.c b/arch/riscv/lib/crc32_lsb.c new file mode 100644 index 000000000000..72fd67e7470c --- /dev/null +++ b/arch/riscv/lib/crc32_lsb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized least-significant-bit-first CRC32 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u32 crc_t; +#define LSB_CRC 1 +#include "crc-clmul-template.h" + +u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/lib/crc32_msb.c b/arch/riscv/lib/crc32_msb.c new file mode 100644 index 000000000000..fdbeaccc369f --- /dev/null +++ b/arch/riscv/lib/crc32_msb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized most-significant-bit-first CRC32 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u32 crc_t; +#define LSB_CRC 0 +#include "crc-clmul-template.h" + +u32 crc32_msb_clmul(u32 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/lib/crc64.c b/arch/riscv/lib/crc64.c new file mode 100644 index 000000000000..f0015a27836a --- /dev/null +++ b/arch/riscv/lib/crc64.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized CRC64 functions + * + * Copyright 2025 Google LLC + */ + +#include <asm/hwcap.h> +#include <asm/alternative-macros.h> +#include <linux/crc64.h> +#include <linux/module.h> + +#include "crc-clmul.h" + +u64 crc64_be_arch(u64 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc64_msb_clmul(crc, p, len, + &crc64_msb_0x42f0e1eba9ea3693_consts); + return crc64_be_generic(crc, p, len); +} +EXPORT_SYMBOL(crc64_be_arch); + +u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc64_lsb_clmul(crc, p, len, + &crc64_lsb_0x9a6c9329ac4bc9b5_consts); + return crc64_nvme_generic(crc, p, len); +} +EXPORT_SYMBOL(crc64_nvme_arch); + +MODULE_DESCRIPTION("RISC-V optimized CRC64 functions"); +MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crc64_lsb.c b/arch/riscv/lib/crc64_lsb.c new file mode 100644 index 000000000000..c5371bb85d90 --- /dev/null +++ b/arch/riscv/lib/crc64_lsb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized least-significant-bit-first CRC64 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u64 crc_t; +#define LSB_CRC 1 +#include "crc-clmul-template.h" + +u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/lib/crc64_msb.c b/arch/riscv/lib/crc64_msb.c new file mode 100644 index 000000000000..1925d1dbe225 --- /dev/null +++ b/arch/riscv/lib/crc64_msb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized most-significant-bit-first CRC64 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u64 crc_t; +#define LSB_CRC 0 +#include "crc-clmul-template.h" + +u64 crc64_msb_clmul(u64 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index a9f2b4af8f3f..0194324a0c50 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -22,6 +22,57 @@ #include "../kernel/head.h" +static void show_pte(unsigned long addr) +{ + pgd_t *pgdp, pgd; + p4d_t *p4dp, p4d; + pud_t *pudp, pud; + pmd_t *pmdp, pmd; + pte_t *ptep, pte; + struct mm_struct *mm = current->mm; + + if (!mm) + mm = &init_mm; + + pr_alert("Current %s pgtable: %luK pagesize, %d-bit VAs, pgdp=0x%016llx\n", + current->comm, PAGE_SIZE / SZ_1K, VA_BITS, + mm == &init_mm ? (u64)__pa_symbol(mm->pgd) : virt_to_phys(mm->pgd)); + + pgdp = pgd_offset(mm, addr); + pgd = pgdp_get(pgdp); + pr_alert("[%016lx] pgd=%016lx", addr, pgd_val(pgd)); + if (pgd_none(pgd) || pgd_bad(pgd) || pgd_leaf(pgd)) + goto out; + + p4dp = p4d_offset(pgdp, addr); + p4d = p4dp_get(p4dp); + pr_cont(", p4d=%016lx", p4d_val(p4d)); + if (p4d_none(p4d) || p4d_bad(p4d) || p4d_leaf(p4d)) + goto out; + + pudp = pud_offset(p4dp, addr); + pud = pudp_get(pudp); + pr_cont(", pud=%016lx", pud_val(pud)); + if (pud_none(pud) || pud_bad(pud) || pud_leaf(pud)) + goto out; + + pmdp = pmd_offset(pudp, addr); + pmd = pmdp_get(pmdp); + pr_cont(", pmd=%016lx", pmd_val(pmd)); + if (pmd_none(pmd) || pmd_bad(pmd) || pmd_leaf(pmd)) + goto out; + + ptep = pte_offset_map(pmdp, addr); + if (!ptep) + goto out; + + pte = ptep_get(ptep); + pr_cont(", pte=%016lx", pte_val(pte)); + pte_unmap(ptep); +out: + pr_cont("\n"); +} + static void die_kernel_fault(const char *msg, unsigned long addr, struct pt_regs *regs) { @@ -31,6 +82,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr, addr); bust_spinlocks(0); + show_pte(addr); die(regs, "Oops"); make_task_dead(SIGKILL); } diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 42314f093922..b4a78a4b35cf 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -293,7 +293,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) + pte_t *ptep, unsigned long sz) { pte_t orig_pte = ptep_get(ptep); int pte_num; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fc53ce748c80..15b2eda4c364 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -33,6 +33,7 @@ #include <asm/pgtable.h> #include <asm/sections.h> #include <asm/soc.h> +#include <asm/sparsemem.h> #include <asm/tlbflush.h> #include "../kernel/head.h" @@ -62,6 +63,13 @@ EXPORT_SYMBOL(pgtable_l5_enabled); phys_addr_t phys_ram_base __ro_after_init; EXPORT_SYMBOL(phys_ram_base); +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#define VMEMMAP_ADDR_ALIGN (1ULL << SECTION_SIZE_BITS) + +unsigned long vmemmap_start_pfn __ro_after_init; +EXPORT_SYMBOL(vmemmap_start_pfn); +#endif + unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -240,8 +248,12 @@ static void __init setup_bootmem(void) * Make sure we align the start of the memory on a PMD boundary so that * at worst, we map the linear mapping with PMD mappings. */ - if (!IS_ENABLED(CONFIG_XIP_KERNEL)) + if (!IS_ENABLED(CONFIG_XIP_KERNEL)) { phys_ram_base = memblock_start_of_DRAM() & PMD_MASK; +#ifdef CONFIG_SPARSEMEM_VMEMMAP + vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT; +#endif + } /* * In 64-bit, any use of __va/__pa before this point is wrong as we @@ -256,8 +268,12 @@ static void __init setup_bootmem(void) */ if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) { max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE; - memblock_cap_memory_range(phys_ram_base, - max_mapped_addr - phys_ram_base); + if (memblock_end_of_DRAM() > max_mapped_addr) { + memblock_cap_memory_range(phys_ram_base, + max_mapped_addr - phys_ram_base); + pr_warn("Physical memory overflows the linear mapping size: region above %pa removed", + &max_mapped_addr); + } } /* @@ -1101,6 +1117,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); phys_ram_base = CONFIG_PHYS_RAM_BASE; +#ifdef CONFIG_SPARSEMEM_VMEMMAP + vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT; +#endif kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start); @@ -1558,7 +1577,7 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) return; } - pagetable_pte_dtor(ptdesc); + pagetable_dtor(ptdesc); if (PageReserved(page)) free_reserved_page(page); else @@ -1580,7 +1599,7 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, bool is_vmemm } if (!is_vmemmap) - pagetable_pmd_dtor(ptdesc); + pagetable_dtor(ptdesc); if (PageReserved(page)) free_reserved_page(page); else diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index c301c8d291d2..41c635d6aca4 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -32,7 +32,7 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned pte_t *ptep, *p; if (pmd_none(pmdp_get(pmd))) { - p = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE); + p = memblock_alloc_or_panic(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE); set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(p)), PAGE_TABLE)); } @@ -54,7 +54,7 @@ static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned unsigned long next; if (pud_none(pudp_get(pud))) { - p = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); + p = memblock_alloc_or_panic(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); set_pud(pud, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE)); } @@ -85,7 +85,7 @@ static void __init kasan_populate_pud(p4d_t *p4d, unsigned long next; if (p4d_none(p4dp_get(p4d))) { - p = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); + p = memblock_alloc_or_panic(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE)); } @@ -116,7 +116,7 @@ static void __init kasan_populate_p4d(pgd_t *pgd, unsigned long next; if (pgd_none(pgdp_get(pgd))) { - p = memblock_alloc(PTRS_PER_P4D * sizeof(p4d_t), PAGE_SIZE); + p = memblock_alloc_or_panic(PTRS_PER_P4D * sizeof(p4d_t), PAGE_SIZE); set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE)); } @@ -385,7 +385,7 @@ static void __init kasan_shallow_populate_pud(p4d_t *p4d, next = pud_addr_end(vaddr, end); if (pud_none(pudp_get(pud_k))) { - p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + p = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); set_pud(pud_k, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE)); continue; } @@ -405,7 +405,7 @@ static void __init kasan_shallow_populate_p4d(pgd_t *pgd, next = p4d_addr_end(vaddr, end); if (p4d_none(p4dp_get(p4d_k))) { - p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + p = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); set_p4d(p4d_k, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE)); continue; } @@ -424,7 +424,7 @@ static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long next = pgd_addr_end(vaddr, end); if (pgd_none(pgdp_get(pgd_k))) { - p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + p = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE)); continue; } |