summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-11-22 11:29:28 -0500
committerDavid S. Miller <davem@davemloft.net>2016-11-22 13:27:16 -0500
commitf9aa9dc7d2d00e6eb02168ffc64ef614b89d7998 (patch)
tree061b767ccf7d6955cc4fb921c230a787d194392e /arch
parent06b37b650cf826349677564cb0ff1560ed8e51fc (diff)
parent3b404a519815b9820f73f1ecf404e5546c9270ba (diff)
downloadlwn-f9aa9dc7d2d00e6eb02168ffc64ef614b89d7998.tar.gz
lwn-f9aa9dc7d2d00e6eb02168ffc64ef614b89d7998.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
All conflicts were simple overlapping changes except perhaps for the Thunder driver. That driver has a change_mtu method explicitly for sending a message to the hardware. If that fails it returns an error. Normally a driver doesn't need an ndo_change_mtu method becuase those are usually just range changes, which are now handled generically. But since this extra operation is needed in the Thunder driver, it has to stay. However, if the message send fails we have to restore the original MTU before the change because the entire call chain expects that if an error is thrown by ndo_change_mtu then the MTU did not change. Therefore code is added to nicvf_change_mtu to remember the original MTU, and to restore it upon nicvf_update_hw_max_frs() failue. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/boot/dts/imx53-qsb.dts14
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv.dtsi5
-rw-r--r--arch/arm/boot/dts/logicpd-torpedo-som.dtsi4
-rw-r--r--arch/arm/boot/dts/omap5-board-common.dtsi7
-rw-r--r--arch/arm/boot/dts/stih410-b2260.dts2
-rw-r--r--arch/arm/boot/dts/sun8i-a23-a33.dtsi4
-rw-r--r--arch/arm/kernel/traps.c20
-rw-r--r--arch/arm/kernel/vmlinux-xip.lds.S5
-rw-r--r--arch/arm/lib/backtrace.S37
-rw-r--r--arch/arm/mach-omap2/Kconfig1
-rw-r--r--arch/arm/mach-omap2/id.c16
-rw-r--r--arch/arm/mach-omap2/prm3xxx.c3
-rw-r--r--arch/arm/mach-omap2/voltage.c6
-rw-r--r--arch/arm/mm/dma-mapping.c2
-rw-r--r--arch/arm/mm/proc-v7m.S2
-rw-r--r--arch/arm64/boot/dts/marvell/armada-37xx.dtsi4
-rw-r--r--arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi6
-rw-r--r--arch/arm64/include/asm/perf_event.h10
-rw-r--r--arch/arm64/kernel/perf_event.c10
-rw-r--r--arch/arm64/kvm/sys_regs.c10
-rw-r--r--arch/powerpc/include/asm/exception-64s.h15
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S11
-rw-r--r--arch/powerpc/kernel/process.c42
-rw-r--r--arch/powerpc/kernel/setup_64.c20
-rw-r--r--arch/powerpc/mm/hash_utils_64.c4
-rw-r--r--arch/powerpc/mm/pgtable-radix.c4
-rw-r--r--arch/powerpc/mm/tlb-radix.c4
-rw-r--r--arch/sparc/Kconfig23
-rw-r--r--arch/sparc/include/asm/hypervisor.h343
-rw-r--r--arch/sparc/include/asm/iommu_64.h28
-rw-r--r--arch/sparc/kernel/hvapi.c1
-rw-r--r--arch/sparc/kernel/iommu.c8
-rw-r--r--arch/sparc/kernel/iommu_common.h1
-rw-r--r--arch/sparc/kernel/pci_sun4v.c418
-rw-r--r--arch/sparc/kernel/pci_sun4v.h21
-rw-r--r--arch/sparc/kernel/pci_sun4v_asm.S68
-rw-r--r--arch/sparc/kernel/signal_32.c4
-rw-r--r--arch/sparc/mm/init_64.c71
-rw-r--r--arch/x86/kvm/irq_comm.c58
-rw-r--r--arch/x86/kvm/x86.c47
-rw-r--r--arch/x86/purgatory/Makefile1
-rw-r--r--arch/xtensa/include/uapi/asm/unistd.h9
-rw-r--r--arch/xtensa/kernel/time.c14
-rw-r--r--arch/xtensa/kernel/traps.c74
45 files changed, 1180 insertions, 278 deletions
diff --git a/arch/arm/boot/dts/imx53-qsb.dts b/arch/arm/boot/dts/imx53-qsb.dts
index dec4b073ceb1..379939699164 100644
--- a/arch/arm/boot/dts/imx53-qsb.dts
+++ b/arch/arm/boot/dts/imx53-qsb.dts
@@ -64,8 +64,8 @@
};
ldo3_reg: ldo3 {
- regulator-min-microvolt = <600000>;
- regulator-max-microvolt = <1800000>;
+ regulator-min-microvolt = <1725000>;
+ regulator-max-microvolt = <3300000>;
regulator-always-on;
};
@@ -76,8 +76,8 @@
};
ldo5_reg: ldo5 {
- regulator-min-microvolt = <1725000>;
- regulator-max-microvolt = <3300000>;
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3600000>;
regulator-always-on;
};
@@ -100,14 +100,14 @@
};
ldo9_reg: ldo9 {
- regulator-min-microvolt = <1200000>;
+ regulator-min-microvolt = <1250000>;
regulator-max-microvolt = <3600000>;
regulator-always-on;
};
ldo10_reg: ldo10 {
- regulator-min-microvolt = <1250000>;
- regulator-max-microvolt = <3650000>;
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3600000>;
regulator-always-on;
};
};
diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
index 0ff1c2de95bf..26cce4d18405 100644
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -13,6 +13,11 @@
};
};
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x80000000 0>;
+ };
+
wl12xx_vmmc: wl12xx_vmmc {
compatible = "regulator-fixed";
regulator-name = "vwl1271";
diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
index 731ec37aed5b..8f9a69ca818c 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
+++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
@@ -13,9 +13,9 @@
};
};
- memory@0 {
+ memory@80000000 {
device_type = "memory";
- reg = <0 0>;
+ reg = <0x80000000 0>;
};
leds {
diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi
index 6365635fea5c..4caadb253249 100644
--- a/arch/arm/boot/dts/omap5-board-common.dtsi
+++ b/arch/arm/boot/dts/omap5-board-common.dtsi
@@ -124,6 +124,7 @@
compatible = "ti,abe-twl6040";
ti,model = "omap5-uevm";
+ ti,jack-detection;
ti,mclk-freq = <19200000>;
ti,mcpdm = <&mcpdm>;
@@ -415,7 +416,7 @@
ti,backup-battery-charge-high-current;
};
- gpadc {
+ gpadc: gpadc {
compatible = "ti,palmas-gpadc";
interrupts = <18 0
16 0
@@ -475,8 +476,8 @@
smps6_reg: smps6 {
/* VDD_DDR3 - over VDD_SMPS6 */
regulator-name = "smps6";
- regulator-min-microvolt = <1200000>;
- regulator-max-microvolt = <1200000>;
+ regulator-min-microvolt = <1350000>;
+ regulator-max-microvolt = <1350000>;
regulator-always-on;
regulator-boot-on;
};
diff --git a/arch/arm/boot/dts/stih410-b2260.dts b/arch/arm/boot/dts/stih410-b2260.dts
index ef2ff2f518f6..7fb507fcba7e 100644
--- a/arch/arm/boot/dts/stih410-b2260.dts
+++ b/arch/arm/boot/dts/stih410-b2260.dts
@@ -74,7 +74,7 @@
/* Low speed expansion connector */
spi0: spi@9844000 {
label = "LS-SPI0";
- cs-gpio = <&pio30 3 0>;
+ cs-gpios = <&pio30 3 0>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
index 48fc24f36fcb..300a1bd5a6ec 100644
--- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
@@ -282,11 +282,15 @@
uart1_pins_a: uart1@0 {
allwinner,pins = "PG6", "PG7";
allwinner,function = "uart1";
+ allwinner,drive = <SUN4I_PINCTRL_10_MA>;
+ allwinner,pull = <SUN4I_PINCTRL_NO_PULL>;
};
uart1_pins_cts_rts_a: uart1-cts-rts@0 {
allwinner,pins = "PG8", "PG9";
allwinner,function = "uart1";
+ allwinner,drive = <SUN4I_PINCTRL_10_MA>;
+ allwinner,pull = <SUN4I_PINCTRL_NO_PULL>;
};
mmc0_pins_a: mmc0@0 {
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index bc698383e822..9688ec0c6ef4 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -74,6 +74,26 @@ void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long
dump_mem("", "Exception stack", frame + 4, frame + 4 + sizeof(struct pt_regs));
}
+void dump_backtrace_stm(u32 *stack, u32 instruction)
+{
+ char str[80], *p;
+ unsigned int x;
+ int reg;
+
+ for (reg = 10, x = 0, p = str; reg >= 0; reg--) {
+ if (instruction & BIT(reg)) {
+ p += sprintf(p, " r%d:%08x", reg, *stack--);
+ if (++x == 6) {
+ x = 0;
+ p = str;
+ printk("%s\n", str);
+ }
+ }
+ }
+ if (p != str)
+ printk("%s\n", str);
+}
+
#ifndef CONFIG_ARM_UNWIND
/*
* Stack pointers should always be within the kernels view of
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 7fa487ef7e2f..37b2a11af345 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -3,6 +3,9 @@
* Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
*/
+/* No __ro_after_init data in the .rodata section - which will always be ro */
+#define RO_AFTER_INIT_DATA
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
@@ -223,6 +226,8 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__init_end = .;
+ *(.data..ro_after_init)
+
NOSAVE_DATA
CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
READ_MOSTLY_DATA(L1_CACHE_BYTES)
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
index fab5a50503ae..7d7952e5a3b1 100644
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -10,6 +10,7 @@
* 27/03/03 Ian Molton Clean up CONFIG_CPU
*
*/
+#include <linux/kern_levels.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
@@ -83,13 +84,13 @@ for_each_frame: tst frame, mask @ Check for address exceptions
teq r3, r1, lsr #11
ldreq r0, [frame, #-8] @ get sp
subeq r0, r0, #4 @ point at the last arg
- bleq .Ldumpstm @ dump saved registers
+ bleq dump_backtrace_stm @ dump saved registers
1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc}
ldr r3, .Ldsi @ instruction exists,
teq r3, r1, lsr #11
subeq r0, frame, #16
- bleq .Ldumpstm @ dump saved registers
+ bleq dump_backtrace_stm @ dump saved registers
teq sv_fp, #0 @ zero saved fp means
beq no_frame @ no further frames
@@ -112,38 +113,6 @@ ENDPROC(c_backtrace)
.long 1004b, 1006b
.popsection
-#define instr r4
-#define reg r5
-#define stack r6
-
-.Ldumpstm: stmfd sp!, {instr, reg, stack, r7, lr}
- mov stack, r0
- mov instr, r1
- mov reg, #10
- mov r7, #0
-1: mov r3, #1
- ARM( tst instr, r3, lsl reg )
- THUMB( lsl r3, reg )
- THUMB( tst instr, r3 )
- beq 2f
- add r7, r7, #1
- teq r7, #6
- moveq r7, #0
- adr r3, .Lcr
- addne r3, r3, #1 @ skip newline
- ldr r2, [stack], #-4
- mov r1, reg
- adr r0, .Lfp
- bl printk
-2: subs reg, reg, #1
- bpl 1b
- teq r7, #0
- adrne r0, .Lcr
- blne printk
- ldmfd sp!, {instr, reg, stack, r7, pc}
-
-.Lfp: .asciz " r%d:%08x%s"
-.Lcr: .asciz "\n"
.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n"
.align
.Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc}
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index a9afeebd59f2..0465338183c7 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -71,6 +71,7 @@ config SOC_AM43XX
select HAVE_ARM_TWD
select ARM_ERRATA_754322
select ARM_ERRATA_775420
+ select OMAP_INTERCONNECT
config SOC_DRA7XX
bool "TI DRA7XX"
diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c
index 2abd53ae3e7a..cc6d9fa60924 100644
--- a/arch/arm/mach-omap2/id.c
+++ b/arch/arm/mach-omap2/id.c
@@ -205,11 +205,15 @@ void __init omap2xxx_check_revision(void)
#define OMAP3_SHOW_FEATURE(feat) \
if (omap3_has_ ##feat()) \
- printk(#feat" ");
+ n += scnprintf(buf + n, sizeof(buf) - n, #feat " ");
static void __init omap3_cpuinfo(void)
{
const char *cpu_name;
+ char buf[64];
+ int n = 0;
+
+ memset(buf, 0, sizeof(buf));
/*
* OMAP3430 and OMAP3530 are assumed to be same.
@@ -241,10 +245,10 @@ static void __init omap3_cpuinfo(void)
cpu_name = "OMAP3503";
}
- sprintf(soc_name, "%s", cpu_name);
+ scnprintf(soc_name, sizeof(soc_name), "%s", cpu_name);
/* Print verbose information */
- pr_info("%s %s (", soc_name, soc_rev);
+ n += scnprintf(buf, sizeof(buf) - n, "%s %s (", soc_name, soc_rev);
OMAP3_SHOW_FEATURE(l2cache);
OMAP3_SHOW_FEATURE(iva);
@@ -252,8 +256,10 @@ static void __init omap3_cpuinfo(void)
OMAP3_SHOW_FEATURE(neon);
OMAP3_SHOW_FEATURE(isp);
OMAP3_SHOW_FEATURE(192mhz_clk);
-
- printk(")\n");
+ if (*(buf + n - 1) == ' ')
+ n--;
+ n += scnprintf(buf + n, sizeof(buf) - n, ")\n");
+ pr_info("%s", buf);
}
#define OMAP3_CHECK_FEATURE(status,feat) \
diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c
index 62680aad2126..718981bb80cd 100644
--- a/arch/arm/mach-omap2/prm3xxx.c
+++ b/arch/arm/mach-omap2/prm3xxx.c
@@ -319,6 +319,9 @@ void __init omap3_prm_init_pm(bool has_uart4, bool has_iva)
if (has_uart4) {
en_uart4_mask = OMAP3630_EN_UART4_MASK;
grpsel_uart4_mask = OMAP3630_GRPSEL_UART4_MASK;
+ } else {
+ en_uart4_mask = 0;
+ grpsel_uart4_mask = 0;
}
/* Enable wakeups in PER */
diff --git a/arch/arm/mach-omap2/voltage.c b/arch/arm/mach-omap2/voltage.c
index cba8cada8c81..cd15dbd62671 100644
--- a/arch/arm/mach-omap2/voltage.c
+++ b/arch/arm/mach-omap2/voltage.c
@@ -87,6 +87,12 @@ int voltdm_scale(struct voltagedomain *voltdm,
return -ENODATA;
}
+ if (!voltdm->volt_data) {
+ pr_err("%s: No voltage data defined for vdd_%s\n",
+ __func__, voltdm->name);
+ return -ENODATA;
+ }
+
/* Adjust voltage to the exact voltage from the OPP table */
for (i = 0; voltdm->volt_data[i].volt_nominal != 0; i++) {
if (voltdm->volt_data[i].volt_nominal >= target_volt) {
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ab4f74536057..ab7710002ba6 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1167,7 +1167,7 @@ static int __init dma_debug_do_init(void)
dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
return 0;
}
-fs_initcall(dma_debug_do_init);
+core_initcall(dma_debug_do_init);
#ifdef CONFIG_ARM_DMA_USE_IOMMU
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index f6d333f09bfe..8dea61640cc1 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -96,7 +96,7 @@ ENTRY(cpu_cm7_proc_fin)
ret lr
ENDPROC(cpu_cm7_proc_fin)
- .section ".text.init", #alloc, #execinstr
+ .section ".init.text", #alloc, #execinstr
__v7m_cm7_setup:
mov r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP)
diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
index c4762538ec01..e9bd58793464 100644
--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
@@ -105,7 +105,7 @@
status = "disabled";
};
- nb_perih_clk: nb-periph-clk@13000{
+ nb_periph_clk: nb-periph-clk@13000 {
compatible = "marvell,armada-3700-periph-clock-nb";
reg = <0x13000 0x100>;
clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>,
@@ -113,7 +113,7 @@
#clock-cells = <1>;
};
- sb_perih_clk: sb-periph-clk@18000{
+ sb_periph_clk: sb-periph-clk@18000 {
compatible = "marvell,armada-3700-periph-clock-sb";
reg = <0x18000 0x100>;
clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>,
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
index 842fb333285c..6bf9e241179b 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
@@ -130,8 +130,8 @@
reg = <0x700600 0x50>;
#address-cells = <0x1>;
#size-cells = <0x0>;
- cell-index = <1>;
- clocks = <&cps_syscon0 0 3>;
+ cell-index = <3>;
+ clocks = <&cps_syscon0 1 21>;
status = "disabled";
};
@@ -140,7 +140,7 @@
reg = <0x700680 0x50>;
#address-cells = <1>;
#size-cells = <0>;
- cell-index = <2>;
+ cell-index = <4>;
clocks = <&cps_syscon0 1 21>;
status = "disabled";
};
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 2065f46fa740..38b6a2b49d68 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -46,7 +46,15 @@
#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
-#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */
+/*
+ * PMUv3 event types: required events
+ */
+#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
+#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
/*
* Event filters for PMUv3
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index a9310a69fffd..57ae9d9ed9bb 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -31,17 +31,9 @@
/*
* ARMv8 PMUv3 Performance Events handling code.
- * Common event types.
+ * Common event types (some are defined in asm/perf_event.h).
*/
-/* Required events. */
-#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
-#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
-#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
-
/* At least one of the following is required. */
#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f302fdb3a030..87e7e6608cd8 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -597,8 +597,14 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
idx = ARMV8_PMU_CYCLE_IDX;
} else {
- BUG();
+ return false;
}
+ } else if (r->CRn == 0 && r->CRm == 9) {
+ /* PMCCNTR */
+ if (pmu_access_event_counter_el0_disabled(vcpu))
+ return false;
+
+ idx = ARMV8_PMU_CYCLE_IDX;
} else if (r->CRn == 14 && (r->CRm & 12) == 8) {
/* PMEVCNTRn_EL0 */
if (pmu_access_event_counter_el0_disabled(vcpu))
@@ -606,7 +612,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
} else {
- BUG();
+ return false;
}
if (!pmu_counter_idx_valid(vcpu, idx))
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 84d49b197c32..9a3eee661297 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -91,7 +91,7 @@
*/
#define LOAD_HANDLER(reg, label) \
ld reg,PACAKBASE(r13); /* get high part of &label */ \
- ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l;
+ ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label);
#define __LOAD_HANDLER(reg, label) \
ld reg,PACAKBASE(r13); \
@@ -158,14 +158,17 @@ BEGIN_FTR_SECTION_NESTED(943) \
std ra,offset(r13); \
END_FTR_SECTION_NESTED(ftr,ftr,943)
-#define EXCEPTION_PROLOG_0(area) \
- GET_PACA(r13); \
+#define EXCEPTION_PROLOG_0_PACA(area) \
std r9,area+EX_R9(r13); /* save r9 */ \
OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \
HMT_MEDIUM; \
std r10,area+EX_R10(r13); /* save r10 - r12 */ \
OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+#define EXCEPTION_PROLOG_0(area) \
+ GET_PACA(r13); \
+ EXCEPTION_PROLOG_0_PACA(area)
+
#define __EXCEPTION_PROLOG_1(area, extra, vec) \
OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \
OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \
@@ -196,6 +199,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
EXCEPTION_PROLOG_1(area, extra, vec); \
EXCEPTION_PROLOG_PSERIES_1(label, h);
+/* Have the PACA in r13 already */
+#define EXCEPTION_PROLOG_PSERIES_PACA(area, label, h, extra, vec) \
+ EXCEPTION_PROLOG_0_PACA(area); \
+ EXCEPTION_PROLOG_1(area, extra, vec); \
+ EXCEPTION_PROLOG_PSERIES_1(label, h);
+
#define __KVMTEST(h, n) \
lbz r10,HSTATE_IN_GUEST(r13); \
cmpwi r10,0; \
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 0132831b3081..c56ea8c84abb 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -460,5 +460,6 @@
#define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \
((IH & 0x7) << 21))
+#define PPC_INVALIDATE_ERAT PPC_SLBIA(7)
#endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 08ba447a4b3d..1ba82ea90230 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -116,7 +116,9 @@ EXC_VIRT_NONE(0x4000, 0x4100)
EXC_REAL_BEGIN(system_reset, 0x100, 0x200)
SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
+ GET_PACA(r13)
+ clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */
+ EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD,
IDLETEST, 0x100)
EXC_REAL_END(system_reset, 0x100, 0x200)
@@ -124,6 +126,9 @@ EXC_VIRT_NONE(0x4100, 0x4200)
#ifdef CONFIG_PPC_P7_NAP
EXC_COMMON_BEGIN(system_reset_idle_common)
+BEGIN_FTR_SECTION
+ GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
bl pnv_restore_hyp_resource
li r0,PNV_THREAD_RUNNING
@@ -169,7 +174,7 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x300)
SET_SCRATCH0(r13) /* save r13 */
/*
* Running native on arch 2.06 or later, we may wakeup from winkle
- * inside machine check. If yes, then last bit of HSPGR0 would be set
+ * inside machine check. If yes, then last bit of HSPRG0 would be set
* to 1. Hence clear it unconditionally.
*/
GET_PACA(r13)
@@ -388,7 +393,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
/*
* Go back to winkle. Please note that this thread was woken up in
* machine check from winkle and have not restored the per-subcore
- * state. Hence before going back to winkle, set last bit of HSPGR0
+ * state. Hence before going back to winkle, set last bit of HSPRG0
* to 1. This will make sure that if this thread gets woken up
* again at reset vector 0x100 then it will get chance to restore
* the subcore state.
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ce6dc61b15b2..49a680d5ae37 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1215,7 +1215,7 @@ static void show_instructions(struct pt_regs *regs)
int instr;
if (!(i % 8))
- printk("\n");
+ pr_cont("\n");
#if !defined(CONFIG_BOOKE)
/* If executing with the IMMU off, adjust pc rather
@@ -1227,18 +1227,18 @@ static void show_instructions(struct pt_regs *regs)
if (!__kernel_text_address(pc) ||
probe_kernel_address((unsigned int __user *)pc, instr)) {
- printk(KERN_CONT "XXXXXXXX ");
+ pr_cont("XXXXXXXX ");
} else {
if (regs->nip == pc)
- printk(KERN_CONT "<%08x> ", instr);
+ pr_cont("<%08x> ", instr);
else
- printk(KERN_CONT "%08x ", instr);
+ pr_cont("%08x ", instr);
}
pc += sizeof(int);
}
- printk("\n");
+ pr_cont("\n");
}
struct regbit {
@@ -1282,7 +1282,7 @@ static void print_bits(unsigned long val, struct regbit *bits, const char *sep)
for (; bits->bit; ++bits)
if (val & bits->bit) {
- printk("%s%s", s, bits->name);
+ pr_cont("%s%s", s, bits->name);
s = sep;
}
}
@@ -1305,9 +1305,9 @@ static void print_tm_bits(unsigned long val)
* T: Transactional (bit 34)
*/
if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) {
- printk(",TM[");
+ pr_cont(",TM[");
print_bits(val, msr_tm_bits, "");
- printk("]");
+ pr_cont("]");
}
}
#else
@@ -1316,10 +1316,10 @@ static void print_tm_bits(unsigned long val) {}
static void print_msr_bits(unsigned long val)
{
- printk("<");
+ pr_cont("<");
print_bits(val, msr_bits, ",");
print_tm_bits(val);
- printk(">");
+ pr_cont(">");
}
#ifdef CONFIG_PPC64
@@ -1347,29 +1347,29 @@ void show_regs(struct pt_regs * regs)
printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
- printk("CFAR: "REG" ", regs->orig_gpr3);
+ pr_cont("CFAR: "REG" ", regs->orig_gpr3);
if (trap == 0x200 || trap == 0x300 || trap == 0x600)
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
+ pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
#else
- printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+ pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
#endif
#ifdef CONFIG_PPC64
- printk("SOFTE: %ld ", regs->softe);
+ pr_cont("SOFTE: %ld ", regs->softe);
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (MSR_TM_ACTIVE(regs->msr))
- printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
+ pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
#endif
for (i = 0; i < 32; i++) {
if ((i % REGS_PER_LINE) == 0)
- printk("\nGPR%02d: ", i);
- printk(REG " ", regs->gpr[i]);
+ pr_cont("\nGPR%02d: ", i);
+ pr_cont(REG " ", regs->gpr[i]);
if (i == LAST_VOLATILE && !FULL_REGS(regs))
break;
}
- printk("\n");
+ pr_cont("\n");
#ifdef CONFIG_KALLSYMS
/*
* Lookup NIP late so we have the best change of getting the
@@ -1900,14 +1900,14 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if ((ip == rth) && curr_frame >= 0) {
- printk(" (%pS)",
+ pr_cont(" (%pS)",
(void *)current->ret_stack[curr_frame].ret);
curr_frame--;
}
#endif
if (firstframe)
- printk(" (unreliable)");
- printk("\n");
+ pr_cont(" (unreliable)");
+ pr_cont("\n");
}
firstframe = 0;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 7ac8e6eaab5b..8d586cff8a41 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -226,17 +226,25 @@ static void __init configure_exceptions(void)
if (firmware_has_feature(FW_FEATURE_OPAL))
opal_configure_cores();
- /* Enable AIL if supported, and we are in hypervisor mode */
- if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
- early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
- unsigned long lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
- }
+ /* AIL on native is done in cpu_ready_for_interrupts() */
}
}
static void cpu_ready_for_interrupts(void)
{
+ /*
+ * Enable AIL if supported, and we are in hypervisor mode. This
+ * is called once for every processor.
+ *
+ * If we are not in hypervisor mode the job is done once for
+ * the whole partition in configure_exceptions().
+ */
+ if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
+ early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ unsigned long lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+ }
+
/* Set IR and DR in PACA MSR */
get_paca()->kernel_msr = MSR_KERNEL;
}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 44d3c3a38e3e..5503078090cd 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1029,6 +1029,10 @@ void hash__early_init_mmu_secondary(void)
{
/* Initialize hash table for that CPU */
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ update_hid_for_hash();
+
if (!cpu_has_feature(CPU_FTR_ARCH_300))
mtspr(SPRN_SDR1, _SDR1);
else
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index ed7bddc456b7..688b54517655 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -388,6 +388,10 @@ void radix__early_init_mmu_secondary(void)
* update partition table control register and UPRT
*/
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ update_hid_for_radix();
+
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index bda8c43be78a..3493cf4e0452 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -50,6 +50,8 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
__tlbiel_pid(pid, set, ric);
}
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
return;
}
@@ -83,6 +85,8 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
}
static inline void _tlbie_va(unsigned long va, unsigned long pid,
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index b23c76b42d6e..165ecdd24d22 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -43,6 +43,7 @@ config SPARC
select ARCH_HAS_SG_CHAIN
select CPU_NO_EFFICIENT_FFS
select HAVE_ARCH_HARDENED_USERCOPY
+ select PROVE_LOCKING_SMALL if PROVE_LOCKING
config SPARC32
def_bool !64BIT
@@ -89,6 +90,14 @@ config ARCH_DEFCONFIG
config ARCH_PROC_KCORE_TEXT
def_bool y
+config ARCH_ATU
+ bool
+ default y if SPARC64
+
+config ARCH_DMA_ADDR_T_64BIT
+ bool
+ default y if ARCH_ATU
+
config IOMMU_HELPER
bool
default y if SPARC64
@@ -304,6 +313,20 @@ config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
def_bool y if SPARC64
+config FORCE_MAX_ZONEORDER
+ int "Maximum zone order"
+ default "13"
+ help
+ The kernel memory allocator divides physically contiguous memory
+ blocks into "zones", where each zone is a power of two number of
+ pages. This option selects the largest power of two that the kernel
+ keeps in the memory allocator. If you need to allocate very large
+ blocks of physically contiguous memory, then you may need to
+ increase this value.
+
+ This config option is actually maximum order plus one. For example,
+ a value of 13 means that the largest free memory block is 2^12 pages.
+
source "mm/Kconfig"
if SPARC64
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index 666d5ba230d2..73cb8978df58 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2335,6 +2335,348 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle,
*/
#define HV_FAST_PCI_MSG_SETVALID 0xd3
+/* PCI IOMMU v2 definitions and services
+ *
+ * While the PCI IO definitions above is valid IOMMU v2 adds new PCI IO
+ * definitions and services.
+ *
+ * CTE Clump Table Entry. First level table entry in the ATU.
+ *
+ * pci_device_list
+ * A 32-bit aligned list of pci_devices.
+ *
+ * pci_device_listp
+ * real address of a pci_device_list. 32-bit aligned.
+ *
+ * iotte IOMMU translation table entry.
+ *
+ * iotte_attributes
+ * IO Attributes for IOMMU v2 mappings. In addition to
+ * read, write IOMMU v2 supports relax ordering
+ *
+ * io_page_list A 64-bit aligned list of real addresses. Each real
+ * address in an io_page_list must be properly aligned
+ * to the pagesize of the given IOTSB.
+ *
+ * io_page_list_p Real address of an io_page_list, 64-bit aligned.
+ *
+ * IOTSB IO Translation Storage Buffer. An aligned table of
+ * IOTTEs. Each IOTSB has a pagesize, table size, and
+ * virtual address associated with it that must match
+ * a pagesize and table size supported by the un-derlying
+ * hardware implementation. The alignment requirements
+ * for an IOTSB depend on the pagesize used for that IOTSB.
+ * Each IOTTE in an IOTSB maps one pagesize-sized page.
+ * The size of the IOTSB dictates how large of a virtual
+ * address space the IOTSB is capable of mapping.
+ *
+ * iotsb_handle An opaque identifier for an IOTSB. A devhandle plus
+ * iotsb_handle represents a binding of an IOTSB to a
+ * PCI root complex.
+ *
+ * iotsb_index Zero-based IOTTE number within an IOTSB.
+ */
+
+/* The index_count argument consists of two fields:
+ * bits 63:48 #iottes and bits 47:0 iotsb_index
+ */
+#define HV_PCI_IOTSB_INDEX_COUNT(__iottes, __iotsb_index) \
+ (((u64)(__iottes) << 48UL) | ((u64)(__iotsb_index)))
+
+/* pci_iotsb_conf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_CONF
+ * ARG0: devhandle
+ * ARG1: r_addr
+ * ARG2: size
+ * ARG3: pagesize
+ * ARG4: iova
+ * RET0: status
+ * RET1: iotsb_handle
+ * ERRORS: EINVAL Invalid devhandle, size, iova, or pagesize
+ * EBADALIGN r_addr is not properly aligned
+ * ENORADDR r_addr is not a valid real address
+ * ETOOMANY No further IOTSBs may be configured
+ * EBUSY Duplicate devhandle, raddir, iova combination
+ *
+ * Create an IOTSB suitable for the PCI root complex identified by devhandle,
+ * for the DMA virtual address defined by the argument iova.
+ *
+ * r_addr is the properly aligned base address of the IOTSB and size is the
+ * IOTSB (table) size in bytes.The IOTSB is required to be zeroed prior to
+ * being configured. If it contains any values other than zeros then the
+ * behavior is undefined.
+ *
+ * pagesize is the size of each page in the IOTSB. Note that the combination of
+ * size (table size) and pagesize must be valid.
+ *
+ * virt is the DMA virtual address this IOTSB will map.
+ *
+ * If successful, the opaque 64-bit handle iotsb_handle is returned in ret1.
+ * Once configured, privileged access to the IOTSB memory is prohibited and
+ * creates undefined behavior. The only permitted access is indirect via these
+ * services.
+ */
+#define HV_FAST_PCI_IOTSB_CONF 0x190
+
+/* pci_iotsb_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_INFO
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * RET0: status
+ * RET1: r_addr
+ * RET2: size
+ * RET3: pagesize
+ * RET4: iova
+ * RET5: #bound
+ * ERRORS: EINVAL Invalid devhandle or iotsb_handle
+ *
+ * This service returns configuration information about an IOTSB previously
+ * created with pci_iotsb_conf.
+ *
+ * iotsb_handle value 0 may be used with this service to inquire about the
+ * legacy IOTSB that may or may not exist. If the service succeeds, the return
+ * values describe the legacy IOTSB and I/O virtual addresses mapped by that
+ * table. However, the table base address r_addr may contain the value -1 which
+ * indicates a memory range that cannot be accessed or be reclaimed.
+ *
+ * The return value #bound contains the number of PCI devices that iotsb_handle
+ * is currently bound to.
+ */
+#define HV_FAST_PCI_IOTSB_INFO 0x191
+
+/* pci_iotsb_unconf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_UNCONF
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or iotsb_handle
+ * EBUSY The IOTSB is bound and may not be unconfigured
+ *
+ * This service unconfigures the IOTSB identified by the devhandle and
+ * iotsb_handle arguments, previously created with pci_iotsb_conf.
+ * The IOTSB must not be currently bound to any device or the service will fail
+ *
+ * If the call succeeds, iotsb_handle is no longer valid.
+ */
+#define HV_FAST_PCI_IOTSB_UNCONF 0x192
+
+/* pci_iotsb_bind()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_BIND
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: pci_device
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or pci_device
+ * EBUSY A PCI function is already bound to an IOTSB at the same
+ * address range as specified by devhandle, iotsb_handle.
+ *
+ * This service binds the PCI function specified by the argument pci_device to
+ * the IOTSB specified by the arguments devhandle and iotsb_handle.
+ *
+ * The PCI device function is bound to the specified IOTSB with the IOVA range
+ * specified when the IOTSB was configured via pci_iotsb_conf. If the function
+ * is already bound then it is unbound first.
+ */
+#define HV_FAST_PCI_IOTSB_BIND 0x193
+
+/* pci_iotsb_unbind()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_UNBIND
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: pci_device
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or pci_device
+ * ENOMAP The PCI function was not bound to the specified IOTSB
+ *
+ * This service unbinds the PCI device specified by the argument pci_device
+ * from the IOTSB identified * by the arguments devhandle and iotsb_handle.
+ *
+ * If the PCI device is not bound to the specified IOTSB then this service will
+ * fail with status ENOMAP
+ */
+#define HV_FAST_PCI_IOTSB_UNBIND 0x194
+
+/* pci_iotsb_get_binding()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_GET_BINDING
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: iova
+ * RET0: status
+ * RET1: iotsb_handle
+ * ERRORS: EINVAL Invalid devhandle, pci_device, or iova
+ * ENOMAP The PCI function is not bound to an IOTSB at iova
+ *
+ * This service returns the IOTSB binding, iotsb_handle, for a given pci_device
+ * and DMA virtual address, iova.
+ *
+ * iova must be the base address of a DMA virtual address range as defined by
+ * the iommu-address-ranges property in the root complex device node defined
+ * by the argument devhandle.
+ */
+#define HV_FAST_PCI_IOTSB_GET_BINDING 0x195
+
+/* pci_iotsb_map()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_MAP
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: index_count
+ * ARG3: iotte_attributes
+ * ARG4: io_page_list_p
+ * RET0: status
+ * RET1: #mapped
+ * ERRORS: EINVAL Invalid devhandle, iotsb_handle, #iottes,
+ * iotsb_index or iotte_attributes
+ * EBADALIGN Improperly aligned io_page_list_p or I/O page
+ * address in the I/O page list.
+ * ENORADDR Invalid io_page_list_p or I/O page address in
+ * the I/O page list.
+ *
+ * This service creates and flushes mappings in the IOTSB defined by the
+ * arguments devhandle, iotsb.
+ *
+ * The index_count argument consists of two fields. Bits 63:48 contain #iotte
+ * and bits 47:0 contain iotsb_index
+ *
+ * The first mapping is created in the IOTSB index specified by iotsb_index.
+ * Subsequent mappings are created at iotsb_index+1 and so on.
+ *
+ * The attributes of each mapping are defined by the argument iotte_attributes.
+ *
+ * The io_page_list_p specifies the real address of the 64-bit-aligned list of
+ * #iottes I/O page addresses. Each page address must be a properly aligned
+ * real address of a page to be mapped in the IOTSB. The first entry in the I/O
+ * page list contains the real address of the first page, the 2nd entry for the
+ * 2nd page, and so on.
+ *
+ * #iottes must be greater than zero.
+ *
+ * The return value #mapped is the actual number of mappings created, which may
+ * be less than or equal to the argument #iottes. If the function returns
+ * successfully with a #mapped value less than the requested #iottes then the
+ * caller should continue to invoke the service with updated iotsb_index,
+ * #iottes, and io_page_list_p arguments until all pages are mapped.
+ *
+ * This service must not be used to demap a mapping. In other words, all
+ * mappings must be valid and have one or both of the RW attribute bits set.
+ *
+ * Note:
+ * It is implementation-defined whether I/O page real address validity checking
+ * is done at time mappings are established or deferred until they are
+ * accessed.
+ */
+#define HV_FAST_PCI_IOTSB_MAP 0x196
+
+/* pci_iotsb_map_one()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_MAP_ONE
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: iotsb_index
+ * ARG3: iotte_attributes
+ * ARG4: r_addr
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle,iotsb_handle, iotsb_index
+ * or iotte_attributes
+ * EBADALIGN Improperly aligned r_addr
+ * ENORADDR Invalid r_addr
+ *
+ * This service creates and flushes a single mapping in the IOTSB defined by the
+ * arguments devhandle, iotsb.
+ *
+ * The mapping for the page at r_addr is created at the IOTSB index specified by
+ * iotsb_index with the attributes iotte_attributes.
+ *
+ * This service must not be used to demap a mapping. In other words, the mapping
+ * must be valid and have one or both of the RW attribute bits set.
+ *
+ * Note:
+ * It is implementation-defined whether I/O page real address validity checking
+ * is done at time mappings are established or deferred until they are
+ * accessed.
+ */
+#define HV_FAST_PCI_IOTSB_MAP_ONE 0x197
+
+/* pci_iotsb_demap()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_DEMAP
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: iotsb_index
+ * ARG3: #iottes
+ * RET0: status
+ * RET1: #unmapped
+ * ERRORS: EINVAL Invalid devhandle, iotsb_handle, iotsb_index or #iottes
+ *
+ * This service unmaps and flushes up to #iottes mappings starting at index
+ * iotsb_index from the IOTSB defined by the arguments devhandle, iotsb.
+ *
+ * #iottes must be greater than zero.
+ *
+ * The actual number of IOTTEs unmapped is returned in #unmapped and may be less
+ * than or equal to the requested number of IOTTEs, #iottes.
+ *
+ * If #unmapped is less than #iottes, the caller should continue to invoke this
+ * service with updated iotsb_index and #iottes arguments until all pages are
+ * demapped.
+ */
+#define HV_FAST_PCI_IOTSB_DEMAP 0x198
+
+/* pci_iotsb_getmap()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_GETMAP
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: iotsb_index
+ * RET0: status
+ * RET1: r_addr
+ * RET2: iotte_attributes
+ * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or iotsb_index
+ * ENOMAP No mapping was found
+ *
+ * This service returns the mapping specified by index iotsb_index from the
+ * IOTSB defined by the arguments devhandle, iotsb.
+ *
+ * Upon success, the real address of the mapping shall be returned in
+ * r_addr and thethe IOTTE mapping attributes shall be returned in
+ * iotte_attributes.
+ *
+ * The return value iotte_attributes may not include optional features used in
+ * the call to create the mapping.
+ */
+#define HV_FAST_PCI_IOTSB_GETMAP 0x199
+
+/* pci_iotsb_sync_mappings()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOTSB_SYNC_MAPPINGS
+ * ARG0: devhandle
+ * ARG1: iotsb_handle
+ * ARG2: iotsb_index
+ * ARG3: #iottes
+ * RET0: status
+ * RET1: #synced
+ * ERROS: EINVAL Invalid devhandle, iotsb_handle, iotsb_index, or #iottes
+ *
+ * This service synchronizes #iottes mappings starting at index iotsb_index in
+ * the IOTSB defined by the arguments devhandle, iotsb.
+ *
+ * #iottes must be greater than zero.
+ *
+ * The actual number of IOTTEs synchronized is returned in #synced, which may
+ * be less than or equal to the requested number, #iottes.
+ *
+ * Upon a successful return, #synced is less than #iottes, the caller should
+ * continue to invoke this service with updated iotsb_index and #iottes
+ * arguments until all pages are synchronized.
+ */
+#define HV_FAST_PCI_IOTSB_SYNC_MAPPINGS 0x19a
+
/* Logical Domain Channel services. */
#define LDC_CHANNEL_DOWN 0
@@ -2993,6 +3335,7 @@ unsigned long sun4v_m7_set_perfreg(unsigned long reg_num,
#define HV_GRP_SDIO 0x0108
#define HV_GRP_SDIO_ERR 0x0109
#define HV_GRP_REBOOT_DATA 0x0110
+#define HV_GRP_ATU 0x0111
#define HV_GRP_M7_PERF 0x0114
#define HV_GRP_NIAG_PERF 0x0200
#define HV_GRP_FIRE_PERF 0x0201
diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h
index cd0d69fa7592..f24f356f2503 100644
--- a/arch/sparc/include/asm/iommu_64.h
+++ b/arch/sparc/include/asm/iommu_64.h
@@ -24,8 +24,36 @@ struct iommu_arena {
unsigned int limit;
};
+#define ATU_64_SPACE_SIZE 0x800000000 /* 32G */
+
+/* Data structures for SPARC ATU architecture */
+struct atu_iotsb {
+ void *table; /* IOTSB table base virtual addr*/
+ u64 ra; /* IOTSB table real addr */
+ u64 dvma_size; /* ranges[3].size or OS slected 32G size */
+ u64 dvma_base; /* ranges[3].base */
+ u64 table_size; /* IOTSB table size */
+ u64 page_size; /* IO PAGE size for IOTSB */
+ u32 iotsb_num; /* tsbnum is same as iotsb_handle */
+};
+
+struct atu_ranges {
+ u64 base;
+ u64 size;
+};
+
+struct atu {
+ struct atu_ranges *ranges;
+ struct atu_iotsb *iotsb;
+ struct iommu_map_table tbl;
+ u64 base;
+ u64 size;
+ u64 dma_addr_mask;
+};
+
struct iommu {
struct iommu_map_table tbl;
+ struct atu *atu;
spinlock_t lock;
u32 dma_addr_mask;
iopte_t *page_table;
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 662500fa555f..267731234ce8 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -39,6 +39,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_SDIO, },
{ .group = HV_GRP_SDIO_ERR, },
{ .group = HV_GRP_REBOOT_DATA, },
+ { .group = HV_GRP_ATU, .flags = FLAG_PRE_API },
{ .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API },
{ .group = HV_GRP_FIRE_PERF, },
{ .group = HV_GRP_N2_CPU, },
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 5c615abff030..852a3291db96 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -760,8 +760,12 @@ int dma_supported(struct device *dev, u64 device_mask)
struct iommu *iommu = dev->archdata.iommu;
u64 dma_addr_mask = iommu->dma_addr_mask;
- if (device_mask >= (1UL << 32UL))
- return 0;
+ if (device_mask > DMA_BIT_MASK(32)) {
+ if (iommu->atu)
+ dma_addr_mask = iommu->atu->dma_addr_mask;
+ else
+ return 0;
+ }
if ((device_mask & dma_addr_mask) == dma_addr_mask)
return 1;
diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h
index b40cec252905..828493329f68 100644
--- a/arch/sparc/kernel/iommu_common.h
+++ b/arch/sparc/kernel/iommu_common.h
@@ -13,7 +13,6 @@
#include <linux/scatterlist.h>
#include <linux/device.h>
#include <linux/iommu-helper.h>
-#include <linux/scatterlist.h>
#include <asm/iommu.h>
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index db57d8acdc01..06981cc716b6 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -44,6 +44,9 @@ static struct vpci_version vpci_versions[] = {
{ .major = 1, .minor = 1 },
};
+static unsigned long vatu_major = 1;
+static unsigned long vatu_minor = 1;
+
#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
struct iommu_batch {
@@ -69,34 +72,57 @@ static inline void iommu_batch_start(struct device *dev, unsigned long prot, uns
}
/* Interrupts must be disabled. */
-static long iommu_batch_flush(struct iommu_batch *p)
+static long iommu_batch_flush(struct iommu_batch *p, u64 mask)
{
struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
+ u64 *pglist = p->pglist;
+ u64 index_count;
unsigned long devhandle = pbm->devhandle;
unsigned long prot = p->prot;
unsigned long entry = p->entry;
- u64 *pglist = p->pglist;
unsigned long npages = p->npages;
+ unsigned long iotsb_num;
+ unsigned long ret;
+ long num;
/* VPCI maj=1, min=[0,1] only supports read and write */
if (vpci_major < 2)
prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE);
while (npages != 0) {
- long num;
-
- num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
- npages, prot, __pa(pglist));
- if (unlikely(num < 0)) {
- if (printk_ratelimit())
- printk("iommu_batch_flush: IOMMU map of "
- "[%08lx:%08llx:%lx:%lx:%lx] failed with "
- "status %ld\n",
- devhandle, HV_PCI_TSBID(0, entry),
- npages, prot, __pa(pglist), num);
- return -1;
+ if (mask <= DMA_BIT_MASK(32)) {
+ num = pci_sun4v_iommu_map(devhandle,
+ HV_PCI_TSBID(0, entry),
+ npages,
+ prot,
+ __pa(pglist));
+ if (unlikely(num < 0)) {
+ pr_err_ratelimited("%s: IOMMU map of [%08lx:%08llx:%lx:%lx:%lx] failed with status %ld\n",
+ __func__,
+ devhandle,
+ HV_PCI_TSBID(0, entry),
+ npages, prot, __pa(pglist),
+ num);
+ return -1;
+ }
+ } else {
+ index_count = HV_PCI_IOTSB_INDEX_COUNT(npages, entry),
+ iotsb_num = pbm->iommu->atu->iotsb->iotsb_num;
+ ret = pci_sun4v_iotsb_map(devhandle,
+ iotsb_num,
+ index_count,
+ prot,
+ __pa(pglist),
+ &num);
+ if (unlikely(ret != HV_EOK)) {
+ pr_err_ratelimited("%s: ATU map of [%08lx:%lx:%llx:%lx:%lx] failed with status %ld\n",
+ __func__,
+ devhandle, iotsb_num,
+ index_count, prot,
+ __pa(pglist), ret);
+ return -1;
+ }
}
-
entry += num;
npages -= num;
pglist += num;
@@ -108,19 +134,19 @@ static long iommu_batch_flush(struct iommu_batch *p)
return 0;
}
-static inline void iommu_batch_new_entry(unsigned long entry)
+static inline void iommu_batch_new_entry(unsigned long entry, u64 mask)
{
struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
if (p->entry + p->npages == entry)
return;
if (p->entry != ~0UL)
- iommu_batch_flush(p);
+ iommu_batch_flush(p, mask);
p->entry = entry;
}
/* Interrupts must be disabled. */
-static inline long iommu_batch_add(u64 phys_page)
+static inline long iommu_batch_add(u64 phys_page, u64 mask)
{
struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
@@ -128,28 +154,31 @@ static inline long iommu_batch_add(u64 phys_page)
p->pglist[p->npages++] = phys_page;
if (p->npages == PGLIST_NENTS)
- return iommu_batch_flush(p);
+ return iommu_batch_flush(p, mask);
return 0;
}
/* Interrupts must be disabled. */
-static inline long iommu_batch_end(void)
+static inline long iommu_batch_end(u64 mask)
{
struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
BUG_ON(p->npages >= PGLIST_NENTS);
- return iommu_batch_flush(p);
+ return iommu_batch_flush(p, mask);
}
static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addrp, gfp_t gfp,
unsigned long attrs)
{
+ u64 mask;
unsigned long flags, order, first_page, npages, n;
unsigned long prot = 0;
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
struct page *page;
void *ret;
long entry;
@@ -174,14 +203,21 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
memset((char *)first_page, 0, PAGE_SIZE << order);
iommu = dev->archdata.iommu;
+ atu = iommu->atu;
+
+ mask = dev->coherent_dma_mask;
+ if (mask <= DMA_BIT_MASK(32))
+ tbl = &iommu->tbl;
+ else
+ tbl = &atu->tbl;
- entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
+ entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
(unsigned long)(-1), 0);
if (unlikely(entry == IOMMU_ERROR_CODE))
goto range_alloc_fail;
- *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
+ *dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
ret = (void *) first_page;
first_page = __pa(first_page);
@@ -193,12 +229,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
entry);
for (n = 0; n < npages; n++) {
- long err = iommu_batch_add(first_page + (n * PAGE_SIZE));
+ long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask);
if (unlikely(err < 0L))
goto iommu_map_fail;
}
- if (unlikely(iommu_batch_end() < 0L))
+ if (unlikely(iommu_batch_end(mask) < 0L))
goto iommu_map_fail;
local_irq_restore(flags);
@@ -206,25 +242,71 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
return ret;
iommu_map_fail:
- iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
+ iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
range_alloc_fail:
free_pages(first_page, order);
return NULL;
}
-static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry,
- unsigned long npages)
+unsigned long dma_4v_iotsb_bind(unsigned long devhandle,
+ unsigned long iotsb_num,
+ struct pci_bus *bus_dev)
+{
+ struct pci_dev *pdev;
+ unsigned long err;
+ unsigned int bus;
+ unsigned int device;
+ unsigned int fun;
+
+ list_for_each_entry(pdev, &bus_dev->devices, bus_list) {
+ if (pdev->subordinate) {
+ /* No need to bind pci bridge */
+ dma_4v_iotsb_bind(devhandle, iotsb_num,
+ pdev->subordinate);
+ } else {
+ bus = bus_dev->number;
+ device = PCI_SLOT(pdev->devfn);
+ fun = PCI_FUNC(pdev->devfn);
+ err = pci_sun4v_iotsb_bind(devhandle, iotsb_num,
+ HV_PCI_DEVICE_BUILD(bus,
+ device,
+ fun));
+
+ /* If bind fails for one device it is going to fail
+ * for rest of the devices because we are sharing
+ * IOTSB. So in case of failure simply return with
+ * error.
+ */
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static void dma_4v_iommu_demap(struct device *dev, unsigned long devhandle,
+ dma_addr_t dvma, unsigned long iotsb_num,
+ unsigned long entry, unsigned long npages)
{
- u32 devhandle = *(u32 *)demap_arg;
unsigned long num, flags;
+ unsigned long ret;
local_irq_save(flags);
do {
- num = pci_sun4v_iommu_demap(devhandle,
- HV_PCI_TSBID(0, entry),
- npages);
-
+ if (dvma <= DMA_BIT_MASK(32)) {
+ num = pci_sun4v_iommu_demap(devhandle,
+ HV_PCI_TSBID(0, entry),
+ npages);
+ } else {
+ ret = pci_sun4v_iotsb_demap(devhandle, iotsb_num,
+ entry, npages, &num);
+ if (unlikely(ret != HV_EOK)) {
+ pr_err_ratelimited("pci_iotsb_demap() failed with error: %ld\n",
+ ret);
+ }
+ }
entry += num;
npages -= num;
} while (npages != 0);
@@ -236,16 +318,28 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
{
struct pci_pbm_info *pbm;
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
unsigned long order, npages, entry;
+ unsigned long iotsb_num;
u32 devhandle;
npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
+ atu = iommu->atu;
devhandle = pbm->devhandle;
- entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
- dma_4v_iommu_demap(&devhandle, entry, npages);
- iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
+
+ if (dvma <= DMA_BIT_MASK(32)) {
+ tbl = &iommu->tbl;
+ iotsb_num = 0; /* we don't care for legacy iommu */
+ } else {
+ tbl = &atu->tbl;
+ iotsb_num = atu->iotsb->iotsb_num;
+ }
+ entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT);
+ dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages);
+ iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE);
order = get_order(size);
if (order < 10)
free_pages((unsigned long)cpu, order);
@@ -257,13 +351,17 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
unsigned long attrs)
{
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
+ u64 mask;
unsigned long flags, npages, oaddr;
unsigned long i, base_paddr;
- u32 bus_addr, ret;
unsigned long prot;
+ dma_addr_t bus_addr, ret;
long entry;
iommu = dev->archdata.iommu;
+ atu = iommu->atu;
if (unlikely(direction == DMA_NONE))
goto bad;
@@ -272,13 +370,19 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT;
- entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
+ mask = *dev->dma_mask;
+ if (mask <= DMA_BIT_MASK(32))
+ tbl = &iommu->tbl;
+ else
+ tbl = &atu->tbl;
+
+ entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
(unsigned long)(-1), 0);
if (unlikely(entry == IOMMU_ERROR_CODE))
goto bad;
- bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
+ bus_addr = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
base_paddr = __pa(oaddr & IO_PAGE_MASK);
prot = HV_PCI_MAP_ATTR_READ;
@@ -293,11 +397,11 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
iommu_batch_start(dev, prot, entry);
for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
- long err = iommu_batch_add(base_paddr);
+ long err = iommu_batch_add(base_paddr, mask);
if (unlikely(err < 0L))
goto iommu_map_fail;
}
- if (unlikely(iommu_batch_end() < 0L))
+ if (unlikely(iommu_batch_end(mask) < 0L))
goto iommu_map_fail;
local_irq_restore(flags);
@@ -310,7 +414,7 @@ bad:
return DMA_ERROR_CODE;
iommu_map_fail:
- iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
+ iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
return DMA_ERROR_CODE;
}
@@ -320,7 +424,10 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
{
struct pci_pbm_info *pbm;
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
unsigned long npages;
+ unsigned long iotsb_num;
long entry;
u32 devhandle;
@@ -332,14 +439,23 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
+ atu = iommu->atu;
devhandle = pbm->devhandle;
npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
npages >>= IO_PAGE_SHIFT;
bus_addr &= IO_PAGE_MASK;
- entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
- dma_4v_iommu_demap(&devhandle, entry, npages);
- iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
+
+ if (bus_addr <= DMA_BIT_MASK(32)) {
+ iotsb_num = 0; /* we don't care for legacy iommu */
+ tbl = &iommu->tbl;
+ } else {
+ iotsb_num = atu->iotsb->iotsb_num;
+ tbl = &atu->tbl;
+ }
+ entry = (bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT;
+ dma_4v_iommu_demap(dev, devhandle, bus_addr, iotsb_num, entry, npages);
+ iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
}
static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -353,12 +469,17 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
unsigned long seg_boundary_size;
int outcount, incount, i;
struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
+ u64 mask;
unsigned long base_shift;
long err;
BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
+ atu = iommu->atu;
+
if (nelems == 0 || !iommu)
return 0;
@@ -384,7 +505,15 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
max_seg_size = dma_get_max_seg_size(dev);
seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
- base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
+
+ mask = *dev->dma_mask;
+ if (mask <= DMA_BIT_MASK(32))
+ tbl = &iommu->tbl;
+ else
+ tbl = &atu->tbl;
+
+ base_shift = tbl->table_map_base >> IO_PAGE_SHIFT;
+
for_each_sg(sglist, s, nelems, i) {
unsigned long paddr, npages, entry, out_entry = 0, slen;
@@ -397,27 +526,26 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
/* Allocate iommu entries for that segment */
paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
- entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
+ entry = iommu_tbl_range_alloc(dev, tbl, npages,
&handle, (unsigned long)(-1), 0);
/* Handle failure */
if (unlikely(entry == IOMMU_ERROR_CODE)) {
- if (printk_ratelimit())
- printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
- " npages %lx\n", iommu, paddr, npages);
+ pr_err_ratelimited("iommu_alloc failed, iommu %p paddr %lx npages %lx\n",
+ tbl, paddr, npages);
goto iommu_map_failed;
}
- iommu_batch_new_entry(entry);
+ iommu_batch_new_entry(entry, mask);
/* Convert entry to a dma_addr_t */
- dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT);
+ dma_addr = tbl->table_map_base + (entry << IO_PAGE_SHIFT);
dma_addr |= (s->offset & ~IO_PAGE_MASK);
/* Insert into HW table */
paddr &= IO_PAGE_MASK;
while (npages--) {
- err = iommu_batch_add(paddr);
+ err = iommu_batch_add(paddr, mask);
if (unlikely(err < 0L))
goto iommu_map_failed;
paddr += IO_PAGE_SIZE;
@@ -452,7 +580,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
dma_next = dma_addr + slen;
}
- err = iommu_batch_end();
+ err = iommu_batch_end(mask);
if (unlikely(err < 0L))
goto iommu_map_failed;
@@ -475,7 +603,7 @@ iommu_map_failed:
vaddr = s->dma_address & IO_PAGE_MASK;
npages = iommu_num_pages(s->dma_address, s->dma_length,
IO_PAGE_SIZE);
- iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
+ iommu_tbl_range_free(tbl, vaddr, npages,
IOMMU_ERROR_CODE);
/* XXX demap? XXX */
s->dma_address = DMA_ERROR_CODE;
@@ -496,13 +624,16 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
struct pci_pbm_info *pbm;
struct scatterlist *sg;
struct iommu *iommu;
+ struct atu *atu;
unsigned long flags, entry;
+ unsigned long iotsb_num;
u32 devhandle;
BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
+ atu = iommu->atu;
devhandle = pbm->devhandle;
local_irq_save(flags);
@@ -512,15 +643,24 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
dma_addr_t dma_handle = sg->dma_address;
unsigned int len = sg->dma_length;
unsigned long npages;
- struct iommu_map_table *tbl = &iommu->tbl;
+ struct iommu_map_table *tbl;
unsigned long shift = IO_PAGE_SHIFT;
if (!len)
break;
npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
+
+ if (dma_handle <= DMA_BIT_MASK(32)) {
+ iotsb_num = 0; /* we don't care for legacy iommu */
+ tbl = &iommu->tbl;
+ } else {
+ iotsb_num = atu->iotsb->iotsb_num;
+ tbl = &atu->tbl;
+ }
entry = ((dma_handle - tbl->table_map_base) >> shift);
- dma_4v_iommu_demap(&devhandle, entry, npages);
- iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
+ dma_4v_iommu_demap(dev, devhandle, dma_handle, iotsb_num,
+ entry, npages);
+ iommu_tbl_range_free(tbl, dma_handle, npages,
IOMMU_ERROR_CODE);
sg = sg_next(sg);
}
@@ -581,6 +721,132 @@ static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
return cnt;
}
+static int pci_sun4v_atu_alloc_iotsb(struct pci_pbm_info *pbm)
+{
+ struct atu *atu = pbm->iommu->atu;
+ struct atu_iotsb *iotsb;
+ void *table;
+ u64 table_size;
+ u64 iotsb_num;
+ unsigned long order;
+ unsigned long err;
+
+ iotsb = kzalloc(sizeof(*iotsb), GFP_KERNEL);
+ if (!iotsb) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+ atu->iotsb = iotsb;
+
+ /* calculate size of IOTSB */
+ table_size = (atu->size / IO_PAGE_SIZE) * 8;
+ order = get_order(table_size);
+ table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+ if (!table) {
+ err = -ENOMEM;
+ goto table_failed;
+ }
+ iotsb->table = table;
+ iotsb->ra = __pa(table);
+ iotsb->dvma_size = atu->size;
+ iotsb->dvma_base = atu->base;
+ iotsb->table_size = table_size;
+ iotsb->page_size = IO_PAGE_SIZE;
+
+ /* configure and register IOTSB with HV */
+ err = pci_sun4v_iotsb_conf(pbm->devhandle,
+ iotsb->ra,
+ iotsb->table_size,
+ iotsb->page_size,
+ iotsb->dvma_base,
+ &iotsb_num);
+ if (err) {
+ pr_err(PFX "pci_iotsb_conf failed error: %ld\n", err);
+ goto iotsb_conf_failed;
+ }
+ iotsb->iotsb_num = iotsb_num;
+
+ err = dma_4v_iotsb_bind(pbm->devhandle, iotsb_num, pbm->pci_bus);
+ if (err) {
+ pr_err(PFX "pci_iotsb_bind failed error: %ld\n", err);
+ goto iotsb_conf_failed;
+ }
+
+ return 0;
+
+iotsb_conf_failed:
+ free_pages((unsigned long)table, order);
+table_failed:
+ kfree(iotsb);
+out_err:
+ return err;
+}
+
+static int pci_sun4v_atu_init(struct pci_pbm_info *pbm)
+{
+ struct atu *atu = pbm->iommu->atu;
+ unsigned long err;
+ const u64 *ranges;
+ u64 map_size, num_iotte;
+ u64 dma_mask;
+ const u32 *page_size;
+ int len;
+
+ ranges = of_get_property(pbm->op->dev.of_node, "iommu-address-ranges",
+ &len);
+ if (!ranges) {
+ pr_err(PFX "No iommu-address-ranges\n");
+ return -EINVAL;
+ }
+
+ page_size = of_get_property(pbm->op->dev.of_node, "iommu-pagesizes",
+ NULL);
+ if (!page_size) {
+ pr_err(PFX "No iommu-pagesizes\n");
+ return -EINVAL;
+ }
+
+ /* There are 4 iommu-address-ranges supported. Each range is pair of
+ * {base, size}. The ranges[0] and ranges[1] are 32bit address space
+ * while ranges[2] and ranges[3] are 64bit space. We want to use 64bit
+ * address ranges to support 64bit addressing. Because 'size' for
+ * address ranges[2] and ranges[3] are same we can select either of
+ * ranges[2] or ranges[3] for mapping. However due to 'size' is too
+ * large for OS to allocate IOTSB we are using fix size 32G
+ * (ATU_64_SPACE_SIZE) which is more than enough for all PCIe devices
+ * to share.
+ */
+ atu->ranges = (struct atu_ranges *)ranges;
+ atu->base = atu->ranges[3].base;
+ atu->size = ATU_64_SPACE_SIZE;
+
+ /* Create IOTSB */
+ err = pci_sun4v_atu_alloc_iotsb(pbm);
+ if (err) {
+ pr_err(PFX "Error creating ATU IOTSB\n");
+ return err;
+ }
+
+ /* Create ATU iommu map.
+ * One bit represents one iotte in IOTSB table.
+ */
+ dma_mask = (roundup_pow_of_two(atu->size) - 1UL);
+ num_iotte = atu->size / IO_PAGE_SIZE;
+ map_size = num_iotte / 8;
+ atu->tbl.table_map_base = atu->base;
+ atu->dma_addr_mask = dma_mask;
+ atu->tbl.map = kzalloc(map_size, GFP_KERNEL);
+ if (!atu->tbl.map)
+ return -ENOMEM;
+
+ iommu_tbl_pool_init(&atu->tbl, num_iotte, IO_PAGE_SHIFT,
+ NULL, false /* no large_pool */,
+ 0 /* default npools */,
+ false /* want span boundary checking */);
+
+ return 0;
+}
+
static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
{
static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
@@ -918,6 +1184,18 @@ static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
pci_sun4v_scan_bus(pbm, &op->dev);
+ /* if atu_init fails its not complete failure.
+ * we can still continue using legacy iommu.
+ */
+ if (pbm->iommu->atu) {
+ err = pci_sun4v_atu_init(pbm);
+ if (err) {
+ kfree(pbm->iommu->atu);
+ pbm->iommu->atu = NULL;
+ pr_err(PFX "ATU init failed, err=%d\n", err);
+ }
+ }
+
pbm->next = pci_pbm_root;
pci_pbm_root = pbm;
@@ -931,8 +1209,10 @@ static int pci_sun4v_probe(struct platform_device *op)
struct pci_pbm_info *pbm;
struct device_node *dp;
struct iommu *iommu;
+ struct atu *atu;
u32 devhandle;
int i, err = -ENODEV;
+ static bool hv_atu = true;
dp = op->dev.of_node;
@@ -954,6 +1234,19 @@ static int pci_sun4v_probe(struct platform_device *op)
pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n",
vpci_major, vpci_minor);
+ err = sun4v_hvapi_register(HV_GRP_ATU, vatu_major, &vatu_minor);
+ if (err) {
+ /* don't return an error if we fail to register the
+ * ATU group, but ATU hcalls won't be available.
+ */
+ hv_atu = false;
+ pr_err(PFX "Could not register hvapi ATU err=%d\n",
+ err);
+ } else {
+ pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n",
+ vatu_major, vatu_minor);
+ }
+
dma_ops = &sun4v_dma_ops;
}
@@ -991,6 +1284,14 @@ static int pci_sun4v_probe(struct platform_device *op)
}
pbm->iommu = iommu;
+ iommu->atu = NULL;
+ if (hv_atu) {
+ atu = kzalloc(sizeof(*atu), GFP_KERNEL);
+ if (!atu)
+ pr_err(PFX "Could not allocate atu\n");
+ else
+ iommu->atu = atu;
+ }
err = pci_sun4v_pbm_init(pbm, op, devhandle);
if (err)
@@ -1001,6 +1302,7 @@ static int pci_sun4v_probe(struct platform_device *op)
return 0;
out_free_iommu:
+ kfree(iommu->atu);
kfree(pbm->iommu);
out_free_controller:
diff --git a/arch/sparc/kernel/pci_sun4v.h b/arch/sparc/kernel/pci_sun4v.h
index 5642212390b2..22603a4e48bf 100644
--- a/arch/sparc/kernel/pci_sun4v.h
+++ b/arch/sparc/kernel/pci_sun4v.h
@@ -89,4 +89,25 @@ unsigned long pci_sun4v_msg_setvalid(unsigned long devhandle,
unsigned long msinum,
unsigned long valid);
+/* Sun4v HV IOMMU v2 APIs */
+unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle,
+ unsigned long ra,
+ unsigned long table_size,
+ unsigned long page_size,
+ unsigned long dvma_base,
+ u64 *iotsb_num);
+unsigned long pci_sun4v_iotsb_bind(unsigned long devhandle,
+ unsigned long iotsb_num,
+ unsigned int pci_device);
+unsigned long pci_sun4v_iotsb_map(unsigned long devhandle,
+ unsigned long iotsb_num,
+ unsigned long iotsb_index_iottes,
+ unsigned long io_attributes,
+ unsigned long io_page_list_pa,
+ long *mapped);
+unsigned long pci_sun4v_iotsb_demap(unsigned long devhandle,
+ unsigned long iotsb_num,
+ unsigned long iotsb_index,
+ unsigned long iottes,
+ unsigned long *demapped);
#endif /* !(_PCI_SUN4V_H) */
diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S
index e606d46c6815..578f09657916 100644
--- a/arch/sparc/kernel/pci_sun4v_asm.S
+++ b/arch/sparc/kernel/pci_sun4v_asm.S
@@ -360,3 +360,71 @@ ENTRY(pci_sun4v_msg_setvalid)
mov %o0, %o0
ENDPROC(pci_sun4v_msg_setvalid)
+ /*
+ * %o0: devhandle
+ * %o1: r_addr
+ * %o2: size
+ * %o3: pagesize
+ * %o4: virt
+ * %o5: &iotsb_num/&iotsb_handle
+ *
+ * returns %o0: status
+ * %o1: iotsb_num/iotsb_handle
+ */
+ENTRY(pci_sun4v_iotsb_conf)
+ mov %o5, %g1
+ mov HV_FAST_PCI_IOTSB_CONF, %o5
+ ta HV_FAST_TRAP
+ retl
+ stx %o1, [%g1]
+ENDPROC(pci_sun4v_iotsb_conf)
+
+ /*
+ * %o0: devhandle
+ * %o1: iotsb_num/iotsb_handle
+ * %o2: pci_device
+ *
+ * returns %o0: status
+ */
+ENTRY(pci_sun4v_iotsb_bind)
+ mov HV_FAST_PCI_IOTSB_BIND, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(pci_sun4v_iotsb_bind)
+
+ /*
+ * %o0: devhandle
+ * %o1: iotsb_num/iotsb_handle
+ * %o2: index_count
+ * %o3: iotte_attributes
+ * %o4: io_page_list_p
+ * %o5: &mapped
+ *
+ * returns %o0: status
+ * %o1: #mapped
+ */
+ENTRY(pci_sun4v_iotsb_map)
+ mov %o5, %g1
+ mov HV_FAST_PCI_IOTSB_MAP, %o5
+ ta HV_FAST_TRAP
+ retl
+ stx %o1, [%g1]
+ENDPROC(pci_sun4v_iotsb_map)
+
+ /*
+ * %o0: devhandle
+ * %o1: iotsb_num/iotsb_handle
+ * %o2: iotsb_index
+ * %o3: #iottes
+ * %o4: &demapped
+ *
+ * returns %o0: status
+ * %o1: #demapped
+ */
+ENTRY(pci_sun4v_iotsb_demap)
+ mov HV_FAST_PCI_IOTSB_DEMAP, %o5
+ ta HV_FAST_TRAP
+ retl
+ stx %o1, [%o4]
+ENDPROC(pci_sun4v_iotsb_demap)
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index c3c12efe0bc0..9c0c8fd0b292 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -89,7 +89,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
sf = (struct signal_frame __user *) regs->u_regs[UREG_FP];
/* 1. Make sure we are not getting garbage from the user */
- if (!invalid_frame_pointer(sf, sizeof(*sf)))
+ if (invalid_frame_pointer(sf, sizeof(*sf)))
goto segv_and_exit;
if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
@@ -150,7 +150,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
synchronize_user_stack();
sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP];
- if (!invalid_frame_pointer(sf, sizeof(*sf)))
+ if (invalid_frame_pointer(sf, sizeof(*sf)))
goto segv;
if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 439784b7b7ac..37aa537b3ad8 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -802,8 +802,10 @@ struct mdesc_mblock {
};
static struct mdesc_mblock *mblocks;
static int num_mblocks;
+static int find_numa_node_for_addr(unsigned long pa,
+ struct node_mem_mask *pnode_mask);
-static unsigned long ra_to_pa(unsigned long addr)
+static unsigned long __init ra_to_pa(unsigned long addr)
{
int i;
@@ -819,8 +821,11 @@ static unsigned long ra_to_pa(unsigned long addr)
return addr;
}
-static int find_node(unsigned long addr)
+static int __init find_node(unsigned long addr)
{
+ static bool search_mdesc = true;
+ static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL };
+ static int last_index;
int i;
addr = ra_to_pa(addr);
@@ -830,13 +835,30 @@ static int find_node(unsigned long addr)
if ((addr & p->mask) == p->val)
return i;
}
- /* The following condition has been observed on LDOM guests.*/
- WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node"
- " rule. Some physical memory will be owned by node 0.");
- return 0;
+ /* The following condition has been observed on LDOM guests because
+ * node_masks only contains the best latency mask and value.
+ * LDOM guest's mdesc can contain a single latency group to
+ * cover multiple address range. Print warning message only if the
+ * address cannot be found in node_masks nor mdesc.
+ */
+ if ((search_mdesc) &&
+ ((addr & last_mem_mask.mask) != last_mem_mask.val)) {
+ /* find the available node in the mdesc */
+ last_index = find_numa_node_for_addr(addr, &last_mem_mask);
+ numadbg("find_node: latency group for address 0x%lx is %d\n",
+ addr, last_index);
+ if ((last_index < 0) || (last_index >= num_node_masks)) {
+ /* WARN_ONCE() and use default group 0 */
+ WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0.");
+ search_mdesc = false;
+ last_index = 0;
+ }
+ }
+
+ return last_index;
}
-static u64 memblock_nid_range(u64 start, u64 end, int *nid)
+static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
{
*nid = find_node(start);
start += PAGE_SIZE;
@@ -1160,6 +1182,41 @@ int __node_distance(int from, int to)
return numa_latency[from][to];
}
+static int find_numa_node_for_addr(unsigned long pa,
+ struct node_mem_mask *pnode_mask)
+{
+ struct mdesc_handle *md = mdesc_grab();
+ u64 node, arc;
+ int i = 0;
+
+ node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
+ if (node == MDESC_NODE_NULL)
+ goto out;
+
+ mdesc_for_each_node_by_name(md, node, "group") {
+ mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) {
+ u64 target = mdesc_arc_target(md, arc);
+ struct mdesc_mlgroup *m = find_mlgroup(target);
+
+ if (!m)
+ continue;
+ if ((pa & m->mask) == m->match) {
+ if (pnode_mask) {
+ pnode_mask->mask = m->mask;
+ pnode_mask->val = m->match;
+ }
+ mdesc_release(md);
+ return i;
+ }
+ }
+ i++;
+ }
+
+out:
+ mdesc_release(md);
+ return -1;
+}
+
static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
{
int i;
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 25810b144b58..4da03030d5a7 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -156,6 +156,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
}
+static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
+{
+ if (!level)
+ return -1;
+
+ return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
+}
+
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
@@ -163,18 +173,26 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq irq;
int r;
- if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
- return -EWOULDBLOCK;
+ switch (e->type) {
+ case KVM_IRQ_ROUTING_HV_SINT:
+ return kvm_hv_set_sint(e, kvm, irq_source_id, level,
+ line_status);
- if (kvm_msi_route_invalid(kvm, e))
- return -EINVAL;
+ case KVM_IRQ_ROUTING_MSI:
+ if (kvm_msi_route_invalid(kvm, e))
+ return -EINVAL;
- kvm_set_msi_irq(kvm, e, &irq);
+ kvm_set_msi_irq(kvm, e, &irq);
- if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
- return r;
- else
- return -EWOULDBLOCK;
+ if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
+ return r;
+ break;
+
+ default:
+ break;
+ }
+
+ return -EWOULDBLOCK;
}
int kvm_request_irq_source_id(struct kvm *kvm)
@@ -254,16 +272,6 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
srcu_read_unlock(&kvm->irq_srcu, idx);
}
-static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
- struct kvm *kvm, int irq_source_id, int level,
- bool line_status)
-{
- if (!level)
- return -1;
-
- return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
-}
-
int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
@@ -423,18 +431,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
srcu_read_unlock(&kvm->irq_srcu, idx);
}
-int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
- int irq_source_id, int level, bool line_status)
-{
- switch (irq->type) {
- case KVM_IRQ_ROUTING_HV_SINT:
- return kvm_hv_set_sint(irq, kvm, irq_source_id, level,
- line_status);
- default:
- return -EWOULDBLOCK;
- }
-}
-
void kvm_arch_irq_routing_update(struct kvm *kvm)
{
kvm_hv_irq_routing_update(kvm);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3017de0431bd..04c5d96b1d67 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -210,7 +210,18 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
struct kvm_shared_msrs *locals
= container_of(urn, struct kvm_shared_msrs, urn);
struct kvm_shared_msr_values *values;
+ unsigned long flags;
+ /*
+ * Disabling irqs at this point since the following code could be
+ * interrupted and executed through kvm_arch_hardware_disable()
+ */
+ local_irq_save(flags);
+ if (locals->registered) {
+ locals->registered = false;
+ user_return_notifier_unregister(urn);
+ }
+ local_irq_restore(flags);
for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
values = &locals->values[slot];
if (values->host != values->curr) {
@@ -218,8 +229,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
values->curr = values->host;
}
}
- locals->registered = false;
- user_return_notifier_unregister(urn);
}
static void shared_msr_update(unsigned slot, u32 msr)
@@ -1724,18 +1733,23 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
static u64 __get_kvmclock_ns(struct kvm *kvm)
{
- struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0);
struct kvm_arch *ka = &kvm->arch;
- s64 ns;
+ struct pvclock_vcpu_time_info hv_clock;
- if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) {
- u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc);
- } else {
- ns = ktime_get_boot_ns() + ka->kvmclock_offset;
+ spin_lock(&ka->pvclock_gtod_sync_lock);
+ if (!ka->use_master_clock) {
+ spin_unlock(&ka->pvclock_gtod_sync_lock);
+ return ktime_get_boot_ns() + ka->kvmclock_offset;
}
- return ns;
+ hv_clock.tsc_timestamp = ka->master_cycle_now;
+ hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
+ spin_unlock(&ka->pvclock_gtod_sync_lock);
+
+ kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
+ &hv_clock.tsc_shift,
+ &hv_clock.tsc_to_system_mul);
+ return __pvclock_read_cycles(&hv_clock, rdtsc());
}
u64 get_kvmclock_ns(struct kvm *kvm)
@@ -2596,7 +2610,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PIT_STATE2:
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
case KVM_CAP_XEN_HVM:
- case KVM_CAP_ADJUST_CLOCK:
case KVM_CAP_VCPU_EVENTS:
case KVM_CAP_HYPERV:
case KVM_CAP_HYPERV_VAPIC:
@@ -2623,6 +2636,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#endif
r = 1;
break;
+ case KVM_CAP_ADJUST_CLOCK:
+ r = KVM_CLOCK_TSC_STABLE;
+ break;
case KVM_CAP_X86_SMM:
/* SMBASE is usually relocated above 1M on modern chipsets,
* and SMM handlers might indeed rely on 4G segment limits,
@@ -3415,6 +3431,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
};
case KVM_SET_VAPIC_ADDR: {
struct kvm_vapic_addr va;
+ int idx;
r = -EINVAL;
if (!lapic_in_kernel(vcpu))
@@ -3422,7 +3439,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&va, argp, sizeof va))
goto out;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
case KVM_X86_SETUP_MCE: {
@@ -4103,9 +4122,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
struct kvm_clock_data user_ns;
u64 now_ns;
- now_ns = get_kvmclock_ns(kvm);
+ local_irq_disable();
+ now_ns = __get_kvmclock_ns(kvm);
user_ns.clock = now_ns;
- user_ns.flags = 0;
+ user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
+ local_irq_enable();
memset(&user_ns.pad, 0, sizeof(user_ns.pad));
r = -EFAULT;
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index ac58c1616408..555b9fa0ad43 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -16,6 +16,7 @@ KCOV_INSTRUMENT := n
KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
KBUILD_CFLAGS += -m$(BITS)
+KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h
index de9b14b2d348..cd400af4a6b2 100644
--- a/arch/xtensa/include/uapi/asm/unistd.h
+++ b/arch/xtensa/include/uapi/asm/unistd.h
@@ -767,7 +767,14 @@ __SYSCALL(346, sys_preadv2, 6)
#define __NR_pwritev2 347
__SYSCALL(347, sys_pwritev2, 6)
-#define __NR_syscall_count 348
+#define __NR_pkey_mprotect 348
+__SYSCALL(348, sys_pkey_mprotect, 4)
+#define __NR_pkey_alloc 349
+__SYSCALL(349, sys_pkey_alloc, 2)
+#define __NR_pkey_free 350
+__SYSCALL(350, sys_pkey_free, 1)
+
+#define __NR_syscall_count 351
/*
* sysxtensa syscall handler
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 9a5bcd0381a7..be81e69b25bc 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -172,10 +172,11 @@ void __init time_init(void)
{
of_clk_init(NULL);
#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
- printk("Calibrating CPU frequency ");
+ pr_info("Calibrating CPU frequency ");
calibrate_ccount();
- printk("%d.%02d MHz\n", (int)ccount_freq/1000000,
- (int)(ccount_freq/10000)%100);
+ pr_cont("%d.%02d MHz\n",
+ (int)ccount_freq / 1000000,
+ (int)(ccount_freq / 10000) % 100);
#else
ccount_freq = CONFIG_XTENSA_CPU_CLOCK*1000000UL;
#endif
@@ -210,9 +211,8 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
void calibrate_delay(void)
{
loops_per_jiffy = ccount_freq / HZ;
- printk("Calibrating delay loop (skipped)... "
- "%lu.%02lu BogoMIPS preset\n",
- loops_per_jiffy/(1000000/HZ),
- (loops_per_jiffy/(10000/HZ)) % 100);
+ pr_info("Calibrating delay loop (skipped)... %lu.%02lu BogoMIPS preset\n",
+ loops_per_jiffy / (1000000 / HZ),
+ (loops_per_jiffy / (10000 / HZ)) % 100);
}
#endif
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index d02fc304b31c..ce37d5b899fe 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -465,26 +465,25 @@ void show_regs(struct pt_regs * regs)
for (i = 0; i < 16; i++) {
if ((i % 8) == 0)
- printk(KERN_INFO "a%02d:", i);
- printk(KERN_CONT " %08lx", regs->areg[i]);
+ pr_info("a%02d:", i);
+ pr_cont(" %08lx", regs->areg[i]);
}
- printk(KERN_CONT "\n");
-
- printk("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n",
- regs->pc, regs->ps, regs->depc, regs->excvaddr);
- printk("lbeg: %08lx, lend: %08lx lcount: %08lx, sar: %08lx\n",
- regs->lbeg, regs->lend, regs->lcount, regs->sar);
+ pr_cont("\n");
+ pr_info("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n",
+ regs->pc, regs->ps, regs->depc, regs->excvaddr);
+ pr_info("lbeg: %08lx, lend: %08lx lcount: %08lx, sar: %08lx\n",
+ regs->lbeg, regs->lend, regs->lcount, regs->sar);
if (user_mode(regs))
- printk("wb: %08lx, ws: %08lx, wmask: %08lx, syscall: %ld\n",
- regs->windowbase, regs->windowstart, regs->wmask,
- regs->syscall);
+ pr_cont("wb: %08lx, ws: %08lx, wmask: %08lx, syscall: %ld\n",
+ regs->windowbase, regs->windowstart, regs->wmask,
+ regs->syscall);
}
static int show_trace_cb(struct stackframe *frame, void *data)
{
if (kernel_text_address(frame->pc)) {
- printk(" [<%08lx>] ", frame->pc);
- print_symbol("%s\n", frame->pc);
+ pr_cont(" [<%08lx>]", frame->pc);
+ print_symbol(" %s\n", frame->pc);
}
return 0;
}
@@ -494,19 +493,13 @@ void show_trace(struct task_struct *task, unsigned long *sp)
if (!sp)
sp = stack_pointer(task);
- printk("Call Trace:");
-#ifdef CONFIG_KALLSYMS
- printk("\n");
-#endif
+ pr_info("Call Trace:\n");
walk_stackframe(sp, show_trace_cb, NULL);
- printk("\n");
+#ifndef CONFIG_KALLSYMS
+ pr_cont("\n");
+#endif
}
-/*
- * This routine abuses get_user()/put_user() to reference pointers
- * with at least a bit of error checking ...
- */
-
static int kstack_depth_to_print = 24;
void show_stack(struct task_struct *task, unsigned long *sp)
@@ -518,52 +511,29 @@ void show_stack(struct task_struct *task, unsigned long *sp)
sp = stack_pointer(task);
stack = sp;
- printk("\nStack: ");
+ pr_info("Stack:\n");
for (i = 0; i < kstack_depth_to_print; i++) {
if (kstack_end(sp))
break;
- if (i && ((i % 8) == 0))
- printk("\n ");
- printk("%08lx ", *sp++);
+ pr_cont(" %08lx", *sp++);
+ if (i % 8 == 7)
+ pr_cont("\n");
}
- printk("\n");
show_trace(task, stack);
}
-void show_code(unsigned int *pc)
-{
- long i;
-
- printk("\nCode:");
-
- for(i = -3 ; i < 6 ; i++) {
- unsigned long insn;
- if (__get_user(insn, pc + i)) {
- printk(" (Bad address in pc)\n");
- break;
- }
- printk("%c%08lx%c",(i?' ':'<'),insn,(i?' ':'>'));
- }
-}
-
DEFINE_SPINLOCK(die_lock);
void die(const char * str, struct pt_regs * regs, long err)
{
static int die_counter;
- int nl = 0;
console_verbose();
spin_lock_irq(&die_lock);
- printk("%s: sig: %ld [#%d]\n", str, err, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
- nl = 1;
-#endif
- if (nl)
- printk("\n");
+ pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter,
+ IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "");
show_regs(regs);
if (!user_mode(regs))
show_stack(NULL, (unsigned long*)regs->areg[1]);