diff options
Diffstat (limited to 'arch/riscv')
41 files changed, 783 insertions, 462 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7612c52e9b1e..10116f68569d 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -25,6 +25,8 @@ config RISCV select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_CRC32 if RISCV_ISA_ZBC + select ARCH_HAS_CRC64 if 64BIT && RISCV_ISA_ZBC + select ARCH_HAS_CRC_T10DIF if RISCV_ISA_ZBC select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE @@ -53,7 +55,7 @@ config RISCV select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN - select ARCH_HAS_VDSO_TIME_DATA + select ARCH_HAS_VDSO_ARCH_DATA if GENERIC_VDSO_DATA_STORE select ARCH_KEEP_MEMBLOCK if ACPI select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if 64BIT && MMU select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX @@ -111,11 +113,13 @@ config RISCV select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_LIB_DEVMEM_IS_ALLOWED + select GENERIC_PENDING_IRQ if SMP select GENERIC_PCI_IOMAP select GENERIC_PTDUMP if MMU select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL if MMU && 64BIT + select GENERIC_VDSO_DATA_STORE if MMU select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND select HAS_IOPORT if MMU diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi index 1069134f2e12..a6dda55a2d1d 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi @@ -32,8 +32,9 @@ #interrupt-cells = <0x1>; #size-cells = <0x2>; device_type = "pci"; - reg = <0x30 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; - reg-names = "cfg", "apb"; + reg = <0x30 0x0 0x0 0x8000000>, <0x0 0x43008000 0x0 0x2000>, + <0x0 0x4300a000 0x0 0x2000>; + reg-names = "cfg", "bridge", "ctrl"; bus-range = <0x0 0x7f>; interrupt-parent = <&plic>; interrupts = <119>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi index 8230f06ddf48..36a9860f31da 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfs-fabric.dtsi @@ -20,8 +20,9 @@ #interrupt-cells = <0x1>; #size-cells = <0x2>; device_type = "pci"; - reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; - reg-names = "cfg", "apb"; + reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43008000 0x0 0x2000>, + <0x0 0x4300a000 0x0 0x2000>; + reg-names = "cfg", "bridge", "ctrl"; bus-range = <0x0 0x7f>; interrupt-parent = <&plic>; interrupts = <119>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi index 9a56de7b91d6..a57dca891965 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry-fabric.dtsi @@ -20,8 +20,9 @@ #interrupt-cells = <0x1>; #size-cells = <0x2>; device_type = "pci"; - reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>; - reg-names = "cfg", "apb"; + reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43008000 0x0 0x2000>, + <0x0 0x4300a000 0x0 0x2000>; + reg-names = "cfg", "bridge", "ctrl"; bus-range = <0x0 0x7f>; interrupt-parent = <&plic>; interrupts = <119>; diff --git a/arch/riscv/boot/dts/sophgo/sg2042-milkv-pioneer.dts b/arch/riscv/boot/dts/sophgo/sg2042-milkv-pioneer.dts index be596d01ff8d..34645a5f6038 100644 --- a/arch/riscv/boot/dts/sophgo/sg2042-milkv-pioneer.dts +++ b/arch/riscv/boot/dts/sophgo/sg2042-milkv-pioneer.dts @@ -73,6 +73,13 @@ }; / { + pwmfan: pwm-fan { + compatible = "pwm-fan"; + cooling-levels = <103 128 179 230 255>; + pwms = <&pwm 0 40000 0>; + #cooling-cells = <2>; + }; + thermal-zones { soc-thermal { polling-delay-passive = <1000>; @@ -104,6 +111,28 @@ type = "hot"; }; }; + + cooling-maps { + map0 { + trip = <&soc_active1>; + cooling-device = <&pwmfan 0 1>; + }; + + map1 { + trip = <&soc_active2>; + cooling-device = <&pwmfan 1 2>; + }; + + map2 { + trip = <&soc_active3>; + cooling-device = <&pwmfan 2 3>; + }; + + map3 { + trip = <&soc_hot>; + cooling-device = <&pwmfan 3 4>; + }; + }; }; board-thermal { @@ -118,6 +147,13 @@ type = "active"; }; }; + + cooling-maps { + map4 { + trip = <&board_active>; + cooling-device = <&pwmfan 3 4>; + }; + }; }; }; }; diff --git a/arch/riscv/boot/dts/sophgo/sg2042.dtsi b/arch/riscv/boot/dts/sophgo/sg2042.dtsi index e62ac51ac55a..aa8b7fcc125d 100644 --- a/arch/riscv/boot/dts/sophgo/sg2042.dtsi +++ b/arch/riscv/boot/dts/sophgo/sg2042.dtsi @@ -165,6 +165,15 @@ }; }; + pwm: pwm@703000c000 { + compatible = "sophgo,sg2042-pwm"; + reg = <0x70 0x3000c000 0x0 0x20>; + #pwm-cells = <3>; + clocks = <&clkgen GATE_CLK_APB_PWM>; + clock-names = "apb"; + resets = <&rstgen RST_PWM>; + }; + pllclk: clock-controller@70300100c0 { compatible = "sophgo,sg2042-pll"; reg = <0x70 0x300100c0 0x0 0x40>; @@ -173,6 +182,16 @@ #clock-cells = <1>; }; + msi: msi-controller@7030010304 { + compatible = "sophgo,sg2042-msi"; + reg = <0x70 0x30010304 0x0 0x4>, + <0x70 0x30010300 0x0 0x4>; + reg-names = "clr", "doorbell"; + msi-controller; + #msi-cells = <0>; + msi-ranges = <&intc 64 IRQ_TYPE_LEVEL_HIGH 32>; + }; + rpgate: clock-controller@7030010368 { compatible = "sophgo,sg2042-rpgate"; reg = <0x70 0x30010368 0x0 0x98>; diff --git a/arch/riscv/boot/dts/spacemit/Makefile b/arch/riscv/boot/dts/spacemit/Makefile index ac617319a574..92e13ce1c16d 100644 --- a/arch/riscv/boot/dts/spacemit/Makefile +++ b/arch/riscv/boot/dts/spacemit/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_ARCH_SPACEMIT) += k1-bananapi-f3.dtb +dtb-$(CONFIG_ARCH_SPACEMIT) += k1-milkv-jupiter.dtb diff --git a/arch/riscv/boot/dts/spacemit/k1-milkv-jupiter.dts b/arch/riscv/boot/dts/spacemit/k1-milkv-jupiter.dts new file mode 100644 index 000000000000..448319214104 --- /dev/null +++ b/arch/riscv/boot/dts/spacemit/k1-milkv-jupiter.dts @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2024 Yangyu Chen <cyy@cyyself.name> + * Copyright (C) 2025 Javier Martinez Canillas <javierm@redhat.com> + */ + +#include "k1.dtsi" +#include "k1-pinctrl.dtsi" + +/ { + model = "Milk-V Jupiter (K1)"; + compatible = "milkv,jupiter", "spacemit,k1"; + + aliases { + serial0 = &uart0; + }; + + chosen { + stdout-path = "serial0"; + }; +}; + +&uart0 { + pinctrl-names = "default"; + pinctrl-0 = <&uart0_2_cfg>; + status = "okay"; +}; diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi index 48fb5091b817..c2f70f5e2918 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi @@ -233,7 +233,7 @@ regulator-always-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1540000>; - regulator-name = "vdd-cpu"; + regulator-name = "vdd_cpu"; }; emmc_vdd: aldo4 { @@ -350,12 +350,6 @@ &spi0 { pinctrl-names = "default"; pinctrl-0 = <&spi0_pins>; - - spi_dev0: spi@0 { - compatible = "rohm,dh2228fv"; - reg = <0>; - spi-max-frequency = <10000000>; - }; }; &syscrg { diff --git a/arch/riscv/boot/dts/starfive/jh7110-deepcomputing-fml13v01.dts b/arch/riscv/boot/dts/starfive/jh7110-deepcomputing-fml13v01.dts index 30b0715196b6..8d9ce8b69a71 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-deepcomputing-fml13v01.dts +++ b/arch/riscv/boot/dts/starfive/jh7110-deepcomputing-fml13v01.dts @@ -11,6 +11,40 @@ compatible = "deepcomputing,fml13v01", "starfive,jh7110"; }; +&pcie1 { + perst-gpios = <&sysgpio 21 GPIO_ACTIVE_LOW>; + phys = <&pciephy1>; + pinctrl-names = "default"; + pinctrl-0 = <&pcie1_pins>; + status = "okay"; +}; + +&sysgpio { + pcie1_pins: pcie1-0 { + clkreq-pins { + pinmux = <GPIOMUX(29, GPOUT_LOW, + GPOEN_DISABLE, + GPI_NONE)>; + bias-pull-down; + drive-strength = <2>; + input-enable; + input-schmitt-disable; + slew-rate = <0>; + }; + + wake-pins { + pinmux = <GPIOMUX(28, GPOUT_HIGH, + GPOEN_DISABLE, + GPI_NONE)>; + bias-pull-up; + drive-strength = <2>; + input-enable; + input-schmitt-disable; + slew-rate = <0>; + }; + }; +}; + &usb0 { dr_mode = "host"; status = "okay"; diff --git a/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts b/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts index b764d4d92fd9..31e825be2065 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts +++ b/arch/riscv/boot/dts/starfive/jh7110-pine64-star64.dts @@ -100,3 +100,8 @@ pinctrl-0 = <&usb0_pins>; status = "okay"; }; + +&usb_cdns3 { + phys = <&usbphy0>, <&pciephy0>; + phy-names = "cdns3,usb2-phy", "cdns3,usb3-phy"; +}; diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi index 0d8339357bad..0ba74ef04679 100644 --- a/arch/riscv/boot/dts/starfive/jh7110.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi @@ -611,6 +611,8 @@ pciephy0: phy@10210000 { compatible = "starfive,jh7110-pcie-phy"; reg = <0x0 0x10210000 0x0 0x10000>; + starfive,sys-syscon = <&sys_syscon 0x18>; + starfive,stg-syscon = <&stg_syscon 0x148 0x1f4>; #phy-cells = <0>; }; @@ -1022,7 +1024,6 @@ snps,force_thresh_dma_mode; snps,axi-config = <&stmmac_axi_setup>; snps,tso; - snps,en-tx-lpi-clockgating; snps,txpbl = <16>; snps,rxpbl = <16>; starfive,syscon = <&aon_syscon 0xc 0x12>; @@ -1053,7 +1054,6 @@ snps,force_thresh_dma_mode; snps,axi-config = <&stmmac_axi_setup>; snps,tso; - snps,en-tx-lpi-clockgating; snps,txpbl = <16>; snps,rxpbl = <16>; starfive,syscon = <&sys_syscon 0x90 0x2>; diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index c6bd3d8354a9..49a0f48d93df 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -226,7 +226,7 @@ legacy: * @nr: Bit to set * @addr: Address to count from * - * This operation may be reordered on other architectures than x86. + * This is an atomic fully-ordered operation (implied full memory barrier). */ static __always_inline int arch_test_and_set_bit(int nr, volatile unsigned long *addr) { @@ -238,7 +238,7 @@ static __always_inline int arch_test_and_set_bit(int nr, volatile unsigned long * @nr: Bit to clear * @addr: Address to count from * - * This operation can be reordered on other architectures other than x86. + * This is an atomic fully-ordered operation (implied full memory barrier). */ static __always_inline int arch_test_and_clear_bit(int nr, volatile unsigned long *addr) { diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 1c5c641075d2..0257f4aa7ff4 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -136,7 +136,7 @@ __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw()) #include <asm-generic/io.h> #ifdef CONFIG_MMU -#define arch_memremap_wb(addr, size) \ +#define arch_memremap_wb(addr, size, flags) \ ((__force void *)ioremap_prot((addr), (size), _PAGE_KERNEL)) #endif diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index cc33e35cd628..0e9c2fab6378 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -301,8 +301,6 @@ static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu) return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu; } -static inline void kvm_arch_sync_events(struct kvm *kvm) {} - #define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12 void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index f891478829a5..c130d8100232 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -14,7 +14,7 @@ */ #ifdef CONFIG_MMU -#define __VVAR_PAGES 2 +#define __VDSO_PAGES 4 #ifndef __ASSEMBLY__ #include <generated/vdso-offsets.h> diff --git a/arch/riscv/include/asm/vdso/time_data.h b/arch/riscv/include/asm/vdso/arch_data.h index dfa65228999b..da57a3786f7a 100644 --- a/arch/riscv/include/asm/vdso/time_data.h +++ b/arch/riscv/include/asm/vdso/arch_data.h @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __RISCV_ASM_VDSO_TIME_DATA_H -#define __RISCV_ASM_VDSO_TIME_DATA_H +#ifndef __RISCV_ASM_VDSO_ARCH_DATA_H +#define __RISCV_ASM_VDSO_ARCH_DATA_H #include <linux/types.h> #include <vdso/datapage.h> #include <asm/hwprobe.h> -struct arch_vdso_time_data { +struct vdso_arch_data { /* Stash static answers to the hwprobe queries when all CPUs are selected. */ __u64 all_cpu_hwprobe_values[RISCV_HWPROBE_MAX_KEY + 1]; @@ -14,4 +14,4 @@ struct arch_vdso_time_data { __u8 homogeneous_cpus; }; -#endif /* __RISCV_ASM_VDSO_TIME_DATA_H */ +#endif /* __RISCV_ASM_VDSO_ARCH_DATA_H */ diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h index ba3283cf7acc..29164f84f93c 100644 --- a/arch/riscv/include/asm/vdso/gettimeofday.h +++ b/arch/riscv/include/asm/vdso/gettimeofday.h @@ -69,7 +69,7 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) #endif /* CONFIG_GENERIC_TIME_VSYSCALL */ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { /* * The purpose of csr_read(CSR_TIME) is to trap the system into @@ -79,18 +79,6 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return csr_read(CSR_TIME); } -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) -{ - return _vdso_data; -} - -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return _timens_data; -} -#endif #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/riscv/include/asm/vdso/vsyscall.h b/arch/riscv/include/asm/vdso/vsyscall.h index e8a9c4b53c0c..1140b54b4bc8 100644 --- a/arch/riscv/include/asm/vdso/vsyscall.h +++ b/arch/riscv/include/asm/vdso/vsyscall.h @@ -6,15 +6,6 @@ #include <vdso/datapage.h> -extern struct vdso_data *vdso_data; - -static __always_inline struct vdso_data *__riscv_get_k_vdso_data(void) -{ - return vdso_data; -} - -#define __arch_get_k_vdso_data __riscv_get_k_vdso_data - /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index bcd3b816306c..04a4e5495512 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -450,8 +450,7 @@ static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, static int __init init_hwprobe_vdso_data(void) { - struct vdso_data *vd = __arch_get_k_vdso_data(); - struct arch_vdso_time_data *avd = &vd->arch_data; + struct vdso_arch_data *avd = vdso_k_arch_data; u64 id_bitsmash = 0; struct riscv_hwprobe pair; int key; diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 3ca3ae4277e1..cc2895d1fbc2 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -13,20 +13,11 @@ #include <linux/err.h> #include <asm/page.h> #include <asm/vdso.h> -#include <linux/time_namespace.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <vdso/vsyscall.h> -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; - -#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) - -static union vdso_data_store vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = vdso_data_store.data; +#define VVAR_SIZE (VDSO_NR_PAGES << PAGE_SHIFT) struct __vdso_info { const char *name; @@ -79,78 +70,6 @@ static void __init __vdso_init(struct __vdso_info *vdso_info) vdso_info->cm->pages = vdso_pagelist; } -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -static const struct vm_special_mapping rv_vvar_map; - -/* - * The vvar mapping contains data for a specific time namespace, so when a task - * changes namespace we must unmap its vvar data for the old namespace. - * Subsequent faults will map in data for the new namespace. - * - * For more details see timens_setup_vdso_data(). - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - - mmap_read_lock(mm); - - for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, &rv_vvar_map)) - zap_vma_pages(vma); - } - - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long pfn; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - if (timens_page) - pfn = page_to_pfn(timens_page); - else - pfn = sym_to_pfn(vdso_data); - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = sym_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - - return vmf_insert_pfn(vma, vmf->address, pfn); -} - -static const struct vm_special_mapping rv_vvar_map = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static struct vm_special_mapping rv_vdso_map __ro_after_init = { .name = "[vdso]", .mremap = vdso_mremap, @@ -196,7 +115,7 @@ static int __setup_additional_pages(struct mm_struct *mm, unsigned long vdso_base, vdso_text_len, vdso_mapping_len; void *ret; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); vdso_text_len = vdso_info->vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ @@ -208,8 +127,7 @@ static int __setup_additional_pages(struct mm_struct *mm, goto up_fail; } - ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE, - (VM_READ | VM_MAYREAD | VM_PFNMAP), &rv_vvar_map); + ret = vdso_install_vvar_mapping(mm, vdso_base); if (IS_ERR(ret)) goto up_fail; diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 9a1b555e8733..ad73607abc28 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -2,7 +2,7 @@ # Copied from arch/tile/kernel/vdso/Makefile # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include # Symbols present in the vdso vdso-syms = rt_sigreturn ifdef CONFIG_64BIT diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index a158c029344f..2ddeba6c68dd 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -16,8 +16,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count, size_t cpusetsize, unsigned long *cpus, unsigned int flags) { - const struct vdso_data *vd = __arch_get_vdso_data(); - const struct arch_vdso_time_data *avd = &vd->arch_data; + const struct vdso_arch_data *avd = &vdso_u_arch_data; bool all_cpus = !cpusetsize && !cpus; struct riscv_hwprobe *p = pairs; struct riscv_hwprobe *end = pairs + pair_count; @@ -51,8 +50,7 @@ static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count, size_t cpusetsize, unsigned long *cpus, unsigned int flags) { - const struct vdso_data *vd = __arch_get_vdso_data(); - const struct arch_vdso_time_data *avd = &vd->arch_data; + const struct vdso_arch_data *avd = &vdso_u_arch_data; struct riscv_hwprobe *p = pairs; struct riscv_hwprobe *end = pairs + pair_count; unsigned char *c = (unsigned char *)cpus; diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index cbe2a179331d..8e86965a8aae 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -4,15 +4,14 @@ */ #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_ARCH(riscv) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 1fa8be5ee509..4b24705dc63a 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -172,8 +172,8 @@ module_init(riscv_kvm_init); static void __exit riscv_kvm_exit(void) { - kvm_riscv_teardown(); - kvm_exit(); + + kvm_riscv_teardown(); } module_exit(riscv_kvm_exit); diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index f6d27b59c641..43ee8e33ba23 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -203,7 +203,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_SVADE: /* * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. - * Svade is not allowed to disable when the platform use Svade. + * Svade can't be disabled unless we support Svadu. */ return arch_has_hw_pte_young(); default: diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 2707a51b082c..78ac3216a54d 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -666,6 +666,7 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba .type = etype, .size = sizeof(struct perf_event_attr), .pinned = true, + .disabled = true, /* * It should never reach here if the platform doesn't support the sscofpmf * extension as mode filtering won't work without it. diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index 96e7a4e463f7..ff672fa71fcc 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -248,18 +248,19 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu) if (t->init_done) return -EINVAL; - hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); t->init_done = true; t->next_set = false; /* Enable sstc for every vcpu if available in hardware */ if (riscv_isa_extension_available(NULL, SSTC)) { t->sstc_enabled = true; - t->hrt.function = kvm_riscv_vcpu_vstimer_expired; + hrtimer_setup(&t->hrt, kvm_riscv_vcpu_vstimer_expired, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); t->timer_next_event = kvm_riscv_vcpu_update_vstimecmp; } else { t->sstc_enabled = false; - t->hrt.function = kvm_riscv_vcpu_hrtimer_expired; + hrtimer_setup(&t->hrt, kvm_riscv_vcpu_hrtimer_expired, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); t->timer_next_event = kvm_riscv_vcpu_update_hrtimer; } diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 79368a895fee..b1c46153606a 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -16,6 +16,11 @@ lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o +crc32-riscv-y := crc32.o crc32_msb.o crc32_lsb.o +obj-$(CONFIG_CRC64_ARCH) += crc64-riscv.o +crc64-riscv-y := crc64.o crc64_msb.o crc64_lsb.o +obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-riscv.o +crc-t10dif-riscv-y := crc-t10dif.o crc16_msb.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RISCV_ISA_V) += xor.o lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o diff --git a/arch/riscv/lib/crc-clmul-consts.h b/arch/riscv/lib/crc-clmul-consts.h new file mode 100644 index 000000000000..8d73449235ef --- /dev/null +++ b/arch/riscv/lib/crc-clmul-consts.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CRC constants generated by: + * + * ./scripts/gen-crc-consts.py riscv_clmul crc16_msb_0x8bb7,crc32_msb_0x04c11db7,crc32_lsb_0xedb88320,crc32_lsb_0x82f63b78,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5 + * + * Do not edit manually. + */ + +struct crc_clmul_consts { + unsigned long fold_across_2_longs_const_hi; + unsigned long fold_across_2_longs_const_lo; + unsigned long barrett_reduction_const_1; + unsigned long barrett_reduction_const_2; +}; + +/* + * Constants generated for most-significant-bit-first CRC-16 using + * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 + */ +static const struct crc_clmul_consts crc16_msb_0x8bb7_consts __maybe_unused = { +#ifdef CONFIG_64BIT + .fold_across_2_longs_const_hi = 0x0000000000001faa, /* x^192 mod G */ + .fold_across_2_longs_const_lo = 0x000000000000a010, /* x^128 mod G */ + .barrett_reduction_const_1 = 0xfb2d2bfc0e99d245, /* floor(x^79 / G) */ + .barrett_reduction_const_2 = 0x0000000000008bb7, /* G - x^16 */ +#else + .fold_across_2_longs_const_hi = 0x00005890, /* x^96 mod G */ + .fold_across_2_longs_const_lo = 0x0000f249, /* x^64 mod G */ + .barrett_reduction_const_1 = 0xfb2d2bfc, /* floor(x^47 / G) */ + .barrett_reduction_const_2 = 0x00008bb7, /* G - x^16 */ +#endif +}; + +/* + * Constants generated for most-significant-bit-first CRC-32 using + * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + + * x^5 + x^4 + x^2 + x^1 + x^0 + */ +static const struct crc_clmul_consts crc32_msb_0x04c11db7_consts __maybe_unused = { +#ifdef CONFIG_64BIT + .fold_across_2_longs_const_hi = 0x00000000c5b9cd4c, /* x^192 mod G */ + .fold_across_2_longs_const_lo = 0x00000000e8a45605, /* x^128 mod G */ + .barrett_reduction_const_1 = 0x826880efa40da72d, /* floor(x^95 / G) */ + .barrett_reduction_const_2 = 0x0000000004c11db7, /* G - x^32 */ +#else + .fold_across_2_longs_const_hi = 0xf200aa66, /* x^96 mod G */ + .fold_across_2_longs_const_lo = 0x490d678d, /* x^64 mod G */ + .barrett_reduction_const_1 = 0x826880ef, /* floor(x^63 / G) */ + .barrett_reduction_const_2 = 0x04c11db7, /* G - x^32 */ +#endif +}; + +/* + * Constants generated for least-significant-bit-first CRC-32 using + * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + + * x^5 + x^4 + x^2 + x^1 + x^0 + */ +static const struct crc_clmul_consts crc32_lsb_0xedb88320_consts __maybe_unused = { +#ifdef CONFIG_64BIT + .fold_across_2_longs_const_hi = 0x65673b4600000000, /* x^191 mod G */ + .fold_across_2_longs_const_lo = 0x9ba54c6f00000000, /* x^127 mod G */ + .barrett_reduction_const_1 = 0xb4e5b025f7011641, /* floor(x^95 / G) */ + .barrett_reduction_const_2 = 0x00000000edb88320, /* (G - x^32) * x^32 */ +#else + .fold_across_2_longs_const_hi = 0xccaa009e, /* x^95 mod G */ + .fold_across_2_longs_const_lo = 0xb8bc6765, /* x^63 mod G */ + .barrett_reduction_const_1 = 0xf7011641, /* floor(x^63 / G) */ + .barrett_reduction_const_2 = 0xedb88320, /* (G - x^32) * x^0 */ +#endif +}; + +/* + * Constants generated for least-significant-bit-first CRC-32 using + * G(x) = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 + + * x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0 + */ +static const struct crc_clmul_consts crc32_lsb_0x82f63b78_consts __maybe_unused = { +#ifdef CONFIG_64BIT + .fold_across_2_longs_const_hi = 0x3743f7bd00000000, /* x^191 mod G */ + .fold_across_2_longs_const_lo = 0x3171d43000000000, /* x^127 mod G */ + .barrett_reduction_const_1 = 0x4869ec38dea713f1, /* floor(x^95 / G) */ + .barrett_reduction_const_2 = 0x0000000082f63b78, /* (G - x^32) * x^32 */ +#else + .fold_across_2_longs_const_hi = 0x493c7d27, /* x^95 mod G */ + .fold_across_2_longs_const_lo = 0xdd45aab8, /* x^63 mod G */ + .barrett_reduction_const_1 = 0xdea713f1, /* floor(x^63 / G) */ + .barrett_reduction_const_2 = 0x82f63b78, /* (G - x^32) * x^0 */ +#endif +}; + +/* + * Constants generated for most-significant-bit-first CRC-64 using + * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + + * x^7 + x^4 + x^1 + x^0 + */ +#ifdef CONFIG_64BIT +static const struct crc_clmul_consts crc64_msb_0x42f0e1eba9ea3693_consts __maybe_unused = { + .fold_across_2_longs_const_hi = 0x4eb938a7d257740e, /* x^192 mod G */ + .fold_across_2_longs_const_lo = 0x05f5c3c7eb52fab6, /* x^128 mod G */ + .barrett_reduction_const_1 = 0xabc694e836627c39, /* floor(x^127 / G) */ + .barrett_reduction_const_2 = 0x42f0e1eba9ea3693, /* G - x^64 */ +}; +#endif + +/* + * Constants generated for least-significant-bit-first CRC-64 using + * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 + + * x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 + + * x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 + + * x^4 + x^3 + x^0 + */ +#ifdef CONFIG_64BIT +static const struct crc_clmul_consts crc64_lsb_0x9a6c9329ac4bc9b5_consts __maybe_unused = { + .fold_across_2_longs_const_hi = 0xeadc41fd2ba3d420, /* x^191 mod G */ + .fold_across_2_longs_const_lo = 0x21e9761e252621ac, /* x^127 mod G */ + .barrett_reduction_const_1 = 0x27ecfa329aef9f77, /* floor(x^127 / G) */ + .barrett_reduction_const_2 = 0x9a6c9329ac4bc9b5, /* (G - x^64) * x^0 */ +}; +#endif diff --git a/arch/riscv/lib/crc-clmul-template.h b/arch/riscv/lib/crc-clmul-template.h new file mode 100644 index 000000000000..77187e7f1762 --- /dev/null +++ b/arch/riscv/lib/crc-clmul-template.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright 2025 Google LLC */ + +/* + * This file is a "template" that generates a CRC function optimized using the + * RISC-V Zbc (scalar carryless multiplication) extension. The includer of this + * file must define the following parameters to specify the type of CRC: + * + * crc_t: the data type of the CRC, e.g. u32 for a 32-bit CRC + * LSB_CRC: 0 for a msb (most-significant-bit) first CRC, i.e. natural + * mapping between bits and polynomial coefficients + * 1 for a lsb (least-significant-bit) first CRC, i.e. reflected + * mapping between bits and polynomial coefficients + */ + +#include <asm/byteorder.h> +#include <linux/minmax.h> + +#define CRC_BITS (8 * sizeof(crc_t)) /* a.k.a. 'n' */ + +static inline unsigned long clmul(unsigned long a, unsigned long b) +{ + unsigned long res; + + asm(".option push\n" + ".option arch,+zbc\n" + "clmul %0, %1, %2\n" + ".option pop\n" + : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +static inline unsigned long clmulh(unsigned long a, unsigned long b) +{ + unsigned long res; + + asm(".option push\n" + ".option arch,+zbc\n" + "clmulh %0, %1, %2\n" + ".option pop\n" + : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +static inline unsigned long clmulr(unsigned long a, unsigned long b) +{ + unsigned long res; + + asm(".option push\n" + ".option arch,+zbc\n" + "clmulr %0, %1, %2\n" + ".option pop\n" + : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +/* + * crc_load_long() loads one "unsigned long" of aligned data bytes, producing a + * polynomial whose bit order matches the CRC's bit order. + */ +#ifdef CONFIG_64BIT +# if LSB_CRC +# define crc_load_long(x) le64_to_cpup(x) +# else +# define crc_load_long(x) be64_to_cpup(x) +# endif +#else +# if LSB_CRC +# define crc_load_long(x) le32_to_cpup(x) +# else +# define crc_load_long(x) be32_to_cpup(x) +# endif +#endif + +/* XOR @crc into the end of @msgpoly that represents the high-order terms. */ +static inline unsigned long +crc_clmul_prep(crc_t crc, unsigned long msgpoly) +{ +#if LSB_CRC + return msgpoly ^ crc; +#else + return msgpoly ^ ((unsigned long)crc << (BITS_PER_LONG - CRC_BITS)); +#endif +} + +/* + * Multiply the long-sized @msgpoly by x^n (a.k.a. x^CRC_BITS) and reduce it + * modulo the generator polynomial G. This gives the CRC of @msgpoly. + */ +static inline crc_t +crc_clmul_long(unsigned long msgpoly, const struct crc_clmul_consts *consts) +{ + unsigned long tmp; + + /* + * First step of Barrett reduction with integrated multiplication by + * x^n: calculate floor((msgpoly * x^n) / G). This is the value by + * which G needs to be multiplied to cancel out the x^n and higher terms + * of msgpoly * x^n. Do it using the following formula: + * + * msb-first: + * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G)) / x^(BITS_PER_LONG-1)) + * lsb-first: + * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G) * x) / x^BITS_PER_LONG) + * + * barrett_reduction_const_1 contains floor(x^(BITS_PER_LONG-1+n) / G), + * which fits a long exactly. Using any lower power of x there would + * not carry enough precision through the calculation, while using any + * higher power of x would require extra instructions to handle a wider + * multiplication. In the msb-first case, using this power of x results + * in needing a floored division by x^(BITS_PER_LONG-1), which matches + * what clmulr produces. In the lsb-first case, a factor of x gets + * implicitly introduced by each carryless multiplication (shown as + * '* x' above), and the floored division instead needs to be by + * x^BITS_PER_LONG which matches what clmul produces. + */ +#if LSB_CRC + tmp = clmul(msgpoly, consts->barrett_reduction_const_1); +#else + tmp = clmulr(msgpoly, consts->barrett_reduction_const_1); +#endif + + /* + * Second step of Barrett reduction: + * + * crc := (msgpoly * x^n) + (G * floor((msgpoly * x^n) / G)) + * + * This reduces (msgpoly * x^n) modulo G by adding the appropriate + * multiple of G to it. The result uses only the x^0..x^(n-1) terms. + * HOWEVER, since the unreduced value (msgpoly * x^n) is zero in those + * terms in the first place, it is more efficient to do the equivalent: + * + * crc := ((G - x^n) * floor((msgpoly * x^n) / G)) mod x^n + * + * In the lsb-first case further modify it to the following which avoids + * a shift, as the crc ends up in the physically low n bits from clmulr: + * + * product := ((G - x^n) * x^(BITS_PER_LONG - n)) * floor((msgpoly * x^n) / G) * x + * crc := floor(product / x^(BITS_PER_LONG + 1 - n)) mod x^n + * + * barrett_reduction_const_2 contains the constant multiplier (G - x^n) + * or (G - x^n) * x^(BITS_PER_LONG - n) from the formulas above. The + * cast of the result to crc_t is essential, as it applies the mod x^n! + */ +#if LSB_CRC + return clmulr(tmp, consts->barrett_reduction_const_2); +#else + return clmul(tmp, consts->barrett_reduction_const_2); +#endif +} + +/* Update @crc with the data from @msgpoly. */ +static inline crc_t +crc_clmul_update_long(crc_t crc, unsigned long msgpoly, + const struct crc_clmul_consts *consts) +{ + return crc_clmul_long(crc_clmul_prep(crc, msgpoly), consts); +} + +/* Update @crc with 1 <= @len < sizeof(unsigned long) bytes of data. */ +static inline crc_t +crc_clmul_update_partial(crc_t crc, const u8 *p, size_t len, + const struct crc_clmul_consts *consts) +{ + unsigned long msgpoly; + size_t i; + +#if LSB_CRC + msgpoly = (unsigned long)p[0] << (BITS_PER_LONG - 8); + for (i = 1; i < len; i++) + msgpoly = (msgpoly >> 8) ^ ((unsigned long)p[i] << (BITS_PER_LONG - 8)); +#else + msgpoly = p[0]; + for (i = 1; i < len; i++) + msgpoly = (msgpoly << 8) ^ p[i]; +#endif + + if (len >= sizeof(crc_t)) { + #if LSB_CRC + msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len); + #else + msgpoly ^= (unsigned long)crc << (8*len - CRC_BITS); + #endif + return crc_clmul_long(msgpoly, consts); + } +#if LSB_CRC + msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len); + return crc_clmul_long(msgpoly, consts) ^ (crc >> (8*len)); +#else + msgpoly ^= crc >> (CRC_BITS - 8*len); + return crc_clmul_long(msgpoly, consts) ^ (crc << (8*len)); +#endif +} + +static inline crc_t +crc_clmul(crc_t crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + size_t align; + + /* This implementation assumes that the CRC fits in an unsigned long. */ + BUILD_BUG_ON(sizeof(crc_t) > sizeof(unsigned long)); + + /* If the buffer is not long-aligned, align it. */ + align = (unsigned long)p % sizeof(unsigned long); + if (align && len) { + align = min(sizeof(unsigned long) - align, len); + crc = crc_clmul_update_partial(crc, p, align, consts); + p += align; + len -= align; + } + + if (len >= 4 * sizeof(unsigned long)) { + unsigned long m0, m1; + + m0 = crc_clmul_prep(crc, crc_load_long(p)); + m1 = crc_load_long(p + sizeof(unsigned long)); + p += 2 * sizeof(unsigned long); + len -= 2 * sizeof(unsigned long); + /* + * Main loop. Each iteration starts with a message polynomial + * (x^BITS_PER_LONG)*m0 + m1, then logically extends it by two + * more longs of data to form x^(3*BITS_PER_LONG)*m0 + + * x^(2*BITS_PER_LONG)*m1 + x^BITS_PER_LONG*m2 + m3, then + * "folds" that back into a congruent (modulo G) value that uses + * just m0 and m1 again. This is done by multiplying m0 by the + * precomputed constant (x^(3*BITS_PER_LONG) mod G) and m1 by + * the precomputed constant (x^(2*BITS_PER_LONG) mod G), then + * adding the results to m2 and m3 as appropriate. Each such + * multiplication produces a result twice the length of a long, + * which in RISC-V is two instructions clmul and clmulh. + * + * This could be changed to fold across more than 2 longs at a + * time if there is a CPU that can take advantage of it. + */ + do { + unsigned long p0, p1, p2, p3; + + p0 = clmulh(m0, consts->fold_across_2_longs_const_hi); + p1 = clmul(m0, consts->fold_across_2_longs_const_hi); + p2 = clmulh(m1, consts->fold_across_2_longs_const_lo); + p3 = clmul(m1, consts->fold_across_2_longs_const_lo); + m0 = (LSB_CRC ? p1 ^ p3 : p0 ^ p2) ^ crc_load_long(p); + m1 = (LSB_CRC ? p0 ^ p2 : p1 ^ p3) ^ + crc_load_long(p + sizeof(unsigned long)); + + p += 2 * sizeof(unsigned long); + len -= 2 * sizeof(unsigned long); + } while (len >= 2 * sizeof(unsigned long)); + + crc = crc_clmul_long(m0, consts); + crc = crc_clmul_update_long(crc, m1, consts); + } + + while (len >= sizeof(unsigned long)) { + crc = crc_clmul_update_long(crc, crc_load_long(p), consts); + p += sizeof(unsigned long); + len -= sizeof(unsigned long); + } + + if (len) + crc = crc_clmul_update_partial(crc, p, len, consts); + + return crc; +} diff --git a/arch/riscv/lib/crc-clmul.h b/arch/riscv/lib/crc-clmul.h new file mode 100644 index 000000000000..dd1736245815 --- /dev/null +++ b/arch/riscv/lib/crc-clmul.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright 2025 Google LLC */ + +#ifndef _RISCV_CRC_CLMUL_H +#define _RISCV_CRC_CLMUL_H + +#include <linux/types.h> +#include "crc-clmul-consts.h" + +u16 crc16_msb_clmul(u16 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +u32 crc32_msb_clmul(u32 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +#ifdef CONFIG_64BIT +u64 crc64_msb_clmul(u64 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts); +#endif + +#endif /* _RISCV_CRC_CLMUL_H */ diff --git a/arch/riscv/lib/crc-t10dif.c b/arch/riscv/lib/crc-t10dif.c new file mode 100644 index 000000000000..e6b0051ccd86 --- /dev/null +++ b/arch/riscv/lib/crc-t10dif.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized CRC-T10DIF function + * + * Copyright 2025 Google LLC + */ + +#include <asm/hwcap.h> +#include <asm/alternative-macros.h> +#include <linux/crc-t10dif.h> +#include <linux/module.h> + +#include "crc-clmul.h" + +u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc16_msb_clmul(crc, p, len, &crc16_msb_0x8bb7_consts); + return crc_t10dif_generic(crc, p, len); +} +EXPORT_SYMBOL(crc_t10dif_arch); + +MODULE_DESCRIPTION("RISC-V optimized CRC-T10DIF function"); +MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crc16_msb.c b/arch/riscv/lib/crc16_msb.c new file mode 100644 index 000000000000..554d295e95f5 --- /dev/null +++ b/arch/riscv/lib/crc16_msb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized most-significant-bit-first CRC16 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u16 crc_t; +#define LSB_CRC 0 +#include "crc-clmul-template.h" + +u16 crc16_msb_clmul(u16 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c deleted file mode 100644 index 53d56ab422c7..000000000000 --- a/arch/riscv/lib/crc32-riscv.c +++ /dev/null @@ -1,311 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Accelerated CRC32 implementation with Zbc extension. - * - * Copyright (C) 2024 Intel Corporation - */ - -#include <asm/hwcap.h> -#include <asm/alternative-macros.h> -#include <asm/byteorder.h> - -#include <linux/types.h> -#include <linux/minmax.h> -#include <linux/crc32poly.h> -#include <linux/crc32.h> -#include <linux/byteorder/generic.h> -#include <linux/module.h> - -/* - * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for - * better understanding of how this math works. - * - * let "+" denotes polynomial add (XOR) - * let "-" denotes polynomial sub (XOR) - * let "*" denotes polynomial multiplication - * let "/" denotes polynomial floor division - * let "S" denotes source data, XLEN bit wide - * let "P" denotes CRC32 polynomial - * let "T" denotes 2^(XLEN+32) - * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit - * - * crc32(S, P) - * => S * (2^32) - S * (2^32) / P * P - * => lowest 32 bits of: S * (2^32) / P * P - * => lowest 32 bits of: S * (2^32) * (T / P) / T * P - * => lowest 32 bits of: S * (2^32) * quotient / T * P - * => lowest 32 bits of: S * quotient / 2^XLEN * P - * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P - * => clmul_low_part(clmul_high_part(S, QT) + S, P) - * - * In terms of below implementations, the BE case is more intuitive, since the - * higher order bit sits at more significant position. - */ - -#if __riscv_xlen == 64 -/* Slide by XLEN bits per iteration */ -# define STEP_ORDER 3 - -/* Each below polynomial quotient has an implicit bit for 2^XLEN */ - -/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */ -# define CRC32_POLY_QT_LE 0x5a72d812fb808b20 - -/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */ -# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8 - -/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be - * the same as the bit-reversed version of CRC32_POLY_QT_LE - */ -# define CRC32_POLY_QT_BE 0x04d101df481b4e5a - -static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr) -{ - return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr); -} - -static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) -{ - u32 crc; - - /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ - asm volatile (".option push\n" - ".option arch,+zbc\n" - "clmul %0, %1, %2\n" - "slli %0, %0, 1\n" - "xor %0, %0, %1\n" - "clmulr %0, %0, %3\n" - "srli %0, %0, 32\n" - ".option pop\n" - : "=&r" (crc) - : "r" (s), - "r" (poly_qt), - "r" ((u64)poly << 32) - :); - return crc; -} - -static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr) -{ - return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr); -} - -#elif __riscv_xlen == 32 -# define STEP_ORDER 2 -/* Each quotient should match the upper half of its analog in RV64 */ -# define CRC32_POLY_QT_LE 0xfb808b20 -# define CRC32C_POLY_QT_LE 0x6f5389f8 -# define CRC32_POLY_QT_BE 0x04d101df - -static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr) -{ - return crc ^ (__force u32)__cpu_to_le32(*ptr); -} - -static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) -{ - u32 crc; - - /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ - asm volatile (".option push\n" - ".option arch,+zbc\n" - "clmul %0, %1, %2\n" - "slli %0, %0, 1\n" - "xor %0, %0, %1\n" - "clmulr %0, %0, %3\n" - ".option pop\n" - : "=&r" (crc) - : "r" (s), - "r" (poly_qt), - "r" (poly) - :); - return crc; -} - -static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr) -{ - return crc ^ (__force u32)__cpu_to_be32(*ptr); -} - -#else -# error "Unexpected __riscv_xlen" -#endif - -static inline u32 crc32_be_zbc(unsigned long s) -{ - u32 crc; - - asm volatile (".option push\n" - ".option arch,+zbc\n" - "clmulh %0, %1, %2\n" - "xor %0, %0, %1\n" - "clmul %0, %0, %3\n" - ".option pop\n" - : "=&r" (crc) - : "r" (s), - "r" (CRC32_POLY_QT_BE), - "r" (CRC32_POLY_BE) - :); - return crc; -} - -#define STEP (1 << STEP_ORDER) -#define OFFSET_MASK (STEP - 1) - -typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len); - -static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p, - size_t len, u32 poly, - unsigned long poly_qt) -{ - size_t bits = len * 8; - unsigned long s = 0; - u32 crc_low = 0; - - for (int i = 0; i < len; i++) - s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8); - - s ^= (unsigned long)crc << (__riscv_xlen - bits); - if (__riscv_xlen == 32 || len < sizeof(u32)) - crc_low = crc >> bits; - - crc = crc32_le_zbc(s, poly, poly_qt); - crc ^= crc_low; - - return crc; -} - -static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, - size_t len, u32 poly, - unsigned long poly_qt, - fallback crc_fb) -{ - size_t offset, head_len, tail_len; - unsigned long const *p_ul; - unsigned long s; - - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBC, 1) - : : : : legacy); - - /* Handle the unaligned head. */ - offset = (unsigned long)p & OFFSET_MASK; - if (offset && len) { - head_len = min(STEP - offset, len); - crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt); - p += head_len; - len -= head_len; - } - - tail_len = len & OFFSET_MASK; - len = len >> STEP_ORDER; - p_ul = (unsigned long const *)p; - - for (int i = 0; i < len; i++) { - s = crc32_le_prep(crc, p_ul); - crc = crc32_le_zbc(s, poly, poly_qt); - p_ul++; - } - - /* Handle the tail bytes. */ - p = (unsigned char const *)p_ul; - if (tail_len) - crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt); - - return crc; - -legacy: - return crc_fb(crc, p, len); -} - -u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) -{ - return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE, - crc32_le_base); -} -EXPORT_SYMBOL(crc32_le_arch); - -u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) -{ - return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, - CRC32C_POLY_QT_LE, crc32c_le_base); -} -EXPORT_SYMBOL(crc32c_le_arch); - -static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, - size_t len) -{ - size_t bits = len * 8; - unsigned long s = 0; - u32 crc_low = 0; - - s = 0; - for (int i = 0; i < len; i++) - s = *p++ | (s << 8); - - if (__riscv_xlen == 32 || len < sizeof(u32)) { - s ^= crc >> (32 - bits); - crc_low = crc << bits; - } else { - s ^= (unsigned long)crc << (bits - 32); - } - - crc = crc32_be_zbc(s); - crc ^= crc_low; - - return crc; -} - -u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) -{ - size_t offset, head_len, tail_len; - unsigned long const *p_ul; - unsigned long s; - - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBC, 1) - : : : : legacy); - - /* Handle the unaligned head. */ - offset = (unsigned long)p & OFFSET_MASK; - if (offset && len) { - head_len = min(STEP - offset, len); - crc = crc32_be_unaligned(crc, p, head_len); - p += head_len; - len -= head_len; - } - - tail_len = len & OFFSET_MASK; - len = len >> STEP_ORDER; - p_ul = (unsigned long const *)p; - - for (int i = 0; i < len; i++) { - s = crc32_be_prep(crc, p_ul); - crc = crc32_be_zbc(s); - p_ul++; - } - - /* Handle the tail bytes. */ - p = (unsigned char const *)p_ul; - if (tail_len) - crc = crc32_be_unaligned(crc, p, tail_len); - - return crc; - -legacy: - return crc32_be_base(crc, p, len); -} -EXPORT_SYMBOL(crc32_be_arch); - -u32 crc32_optimizations(void) -{ - if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) - return CRC32_LE_OPTIMIZATION | - CRC32_BE_OPTIMIZATION | - CRC32C_OPTIMIZATION; - return 0; -} -EXPORT_SYMBOL(crc32_optimizations); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension"); diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c new file mode 100644 index 000000000000..a3188b7d9c40 --- /dev/null +++ b/arch/riscv/lib/crc32.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized CRC32 functions + * + * Copyright 2025 Google LLC + */ + +#include <asm/hwcap.h> +#include <asm/alternative-macros.h> +#include <linux/crc32.h> +#include <linux/module.h> + +#include "crc-clmul.h" + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc32_lsb_clmul(crc, p, len, + &crc32_lsb_0xedb88320_consts); + return crc32_le_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc32_msb_clmul(crc, p, len, + &crc32_msb_0x04c11db7_consts); + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +u32 crc32c_arch(u32 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc32_lsb_clmul(crc, p, len, + &crc32_lsb_0x82f63b78_consts); + return crc32c_base(crc, p, len); +} +EXPORT_SYMBOL(crc32c_arch); + +u32 crc32_optimizations(void) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_DESCRIPTION("RISC-V optimized CRC32 functions"); +MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crc32_lsb.c b/arch/riscv/lib/crc32_lsb.c new file mode 100644 index 000000000000..72fd67e7470c --- /dev/null +++ b/arch/riscv/lib/crc32_lsb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized least-significant-bit-first CRC32 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u32 crc_t; +#define LSB_CRC 1 +#include "crc-clmul-template.h" + +u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/lib/crc32_msb.c b/arch/riscv/lib/crc32_msb.c new file mode 100644 index 000000000000..fdbeaccc369f --- /dev/null +++ b/arch/riscv/lib/crc32_msb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized most-significant-bit-first CRC32 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u32 crc_t; +#define LSB_CRC 0 +#include "crc-clmul-template.h" + +u32 crc32_msb_clmul(u32 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/lib/crc64.c b/arch/riscv/lib/crc64.c new file mode 100644 index 000000000000..f0015a27836a --- /dev/null +++ b/arch/riscv/lib/crc64.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized CRC64 functions + * + * Copyright 2025 Google LLC + */ + +#include <asm/hwcap.h> +#include <asm/alternative-macros.h> +#include <linux/crc64.h> +#include <linux/module.h> + +#include "crc-clmul.h" + +u64 crc64_be_arch(u64 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc64_msb_clmul(crc, p, len, + &crc64_msb_0x42f0e1eba9ea3693_consts); + return crc64_be_generic(crc, p, len); +} +EXPORT_SYMBOL(crc64_be_arch); + +u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return crc64_lsb_clmul(crc, p, len, + &crc64_lsb_0x9a6c9329ac4bc9b5_consts); + return crc64_nvme_generic(crc, p, len); +} +EXPORT_SYMBOL(crc64_nvme_arch); + +MODULE_DESCRIPTION("RISC-V optimized CRC64 functions"); +MODULE_LICENSE("GPL"); diff --git a/arch/riscv/lib/crc64_lsb.c b/arch/riscv/lib/crc64_lsb.c new file mode 100644 index 000000000000..c5371bb85d90 --- /dev/null +++ b/arch/riscv/lib/crc64_lsb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized least-significant-bit-first CRC64 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u64 crc_t; +#define LSB_CRC 1 +#include "crc-clmul-template.h" + +u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} diff --git a/arch/riscv/lib/crc64_msb.c b/arch/riscv/lib/crc64_msb.c new file mode 100644 index 000000000000..1925d1dbe225 --- /dev/null +++ b/arch/riscv/lib/crc64_msb.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RISC-V optimized most-significant-bit-first CRC64 + * + * Copyright 2025 Google LLC + */ + +#include "crc-clmul.h" + +typedef u64 crc_t; +#define LSB_CRC 0 +#include "crc-clmul-template.h" + +u64 crc64_msb_clmul(u64 crc, const void *p, size_t len, + const struct crc_clmul_consts *consts) +{ + return crc_clmul(crc, p, len, consts); +} |