From fa3959f457109cc7d082b86ea6daae927982815b Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 24 Apr 2008 01:27:02 +0200 Subject: mv643xx_eth: get rid of static variables, allow multiple instances Move mv643xx_eth's static state (ethernet register block base address and MII management interface spinlock) into a struct hanging off the shared platform device. This is necessary to support chips that contain multiple mv643xx_eth silicon blocks. Signed-off-by: Lennert Buytenhek Acked-by: Nicolas Pitre Signed-off-by: Dale Farnsworth --- include/linux/mv643xx_eth.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 30e11aa3c1c9..2d59855b61c1 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -1,6 +1,7 @@ /* * MV-643XX ethernet platform device data definition file. */ + #ifndef __LINUX_MV643XX_ETH_H #define __LINUX_MV643XX_ETH_H @@ -13,7 +14,9 @@ #define MV643XX_ETH_BASE_ADDR_ENABLE_REG 0x2290 struct mv643xx_eth_platform_data { + struct platform_device *shared; int port_number; + u16 force_phy_addr; /* force override if phy_addr == 0 */ u16 phy_addr; -- cgit v1.2.3 From f2ce825d2a89b30af14fa577298fecaab7bc9504 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 24 Apr 2008 01:27:17 +0200 Subject: mv643xx_eth: mbus decode window support Make it possible to pass mbus_dram_target_info to the mv643xx_eth driver via the platform data, and make the mv643xx_eth driver program the window registers based on this data if it is passed in. Signed-off-by: Lennert Buytenhek Reviewed-by: Tzachi Perelstein Acked-by: Russell King Signed-off-by: Dale Farnsworth --- drivers/net/mv643xx_eth.c | 51 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/mv643xx_eth.h | 6 ++++++ 2 files changed, 57 insertions(+) (limited to 'include') diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index eebf0d288e36..aabf1c60946d 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -91,6 +91,11 @@ */ #define PHY_ADDR_REG 0x0000 #define SMI_REG 0x0004 +#define WINDOW_BASE(i) (0x0200 + ((i) << 3)) +#define WINDOW_SIZE(i) (0x0204 + ((i) << 3)) +#define WINDOW_REMAP_HIGH(i) (0x0280 + ((i) << 2)) +#define WINDOW_BAR_ENABLE 0x0290 +#define WINDOW_PROTECT(i) (0x0294 + ((i) << 4)) /* * Per-port registers. @@ -512,6 +517,8 @@ struct mv643xx_shared_private { /* used to protect SMI_REG, which is shared across ports */ spinlock_t phy_lock; + + u32 win_protect; }; struct mv643xx_private { @@ -1888,6 +1895,9 @@ static int mv643xx_eth_probe(struct platform_device *pdev) mp->shared = platform_get_drvdata(pd->shared); port_num = mp->port_num = pd->port_number; + if (mp->shared->win_protect) + wrl(mp, WINDOW_PROTECT(port_num), mp->shared->win_protect); + /* set default config values */ eth_port_uc_addr_get(mp, dev->dev_addr); mp->rx_ring_size = PORT_DEFAULT_RECEIVE_QUEUE_SIZE; @@ -1992,9 +2002,44 @@ static int mv643xx_eth_remove(struct platform_device *pdev) return 0; } +static void mv643xx_eth_conf_mbus_windows(struct mv643xx_shared_private *msp, + struct mbus_dram_target_info *dram) +{ + void __iomem *base = msp->eth_base; + u32 win_enable; + u32 win_protect; + int i; + + for (i = 0; i < 6; i++) { + writel(0, base + WINDOW_BASE(i)); + writel(0, base + WINDOW_SIZE(i)); + if (i < 4) + writel(0, base + WINDOW_REMAP_HIGH(i)); + } + + win_enable = 0x3f; + win_protect = 0; + + for (i = 0; i < dram->num_cs; i++) { + struct mbus_dram_window *cs = dram->cs + i; + + writel((cs->base & 0xffff0000) | + (cs->mbus_attr << 8) | + dram->mbus_dram_target_id, base + WINDOW_BASE(i)); + writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i)); + + win_enable &= ~(1 << i); + win_protect |= 3 << (2 * i); + } + + writel(win_enable, base + WINDOW_BAR_ENABLE); + msp->win_protect = win_protect; +} + static int mv643xx_eth_shared_probe(struct platform_device *pdev) { static int mv643xx_version_printed = 0; + struct mv643xx_eth_shared_platform_data *pd = pdev->dev.platform_data; struct mv643xx_shared_private *msp; struct resource *res; int ret; @@ -2021,6 +2066,12 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) platform_set_drvdata(pdev, msp); + /* + * (Re-)program MBUS remapping windows if we are asked to. + */ + if (pd != NULL && pd->dram != NULL) + mv643xx_eth_conf_mbus_windows(msp, pd->dram); + return 0; out_free: diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 2d59855b61c1..4801b02b444e 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -5,6 +5,8 @@ #ifndef __LINUX_MV643XX_ETH_H #define __LINUX_MV643XX_ETH_H +#include + #define MV643XX_ETH_SHARED_NAME "mv643xx_eth_shared" #define MV643XX_ETH_NAME "mv643xx_eth" #define MV643XX_ETH_SHARED_REGS 0x2000 @@ -13,6 +15,10 @@ #define MV643XX_ETH_SIZE_REG_4 0x2224 #define MV643XX_ETH_BASE_ADDR_ENABLE_REG 0x2290 +struct mv643xx_eth_shared_platform_data { + struct mbus_dram_target_info *dram; +}; + struct mv643xx_eth_platform_data { struct platform_device *shared; int port_number; -- cgit v1.2.3 From c416a41f99be190e1f558cb06f70ddd560ce8b4b Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 24 Apr 2008 01:27:32 +0200 Subject: mv643xx_eth: configurable t_clk Make t_clk configurable via platform device data (with the current hardcoded value, 133 MHz, being the default), as it varies across different chip families. Signed-off-by: Lennert Buytenhek Acked-by: Nicolas Pitre Signed-off-by: Dale Farnsworth --- drivers/net/mv643xx_eth.c | 17 +++++++++-------- include/linux/mv643xx_eth.h | 1 + 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index aabf1c60946d..8bd41e4e88a9 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -519,6 +519,8 @@ struct mv643xx_shared_private { spinlock_t phy_lock; u32 win_protect; + + unsigned int t_clk; }; struct mv643xx_private { @@ -1129,7 +1131,6 @@ static irqreturn_t mv643xx_eth_int_handler(int irq, void *dev_id) * * INPUT: * struct mv643xx_private *mp Ethernet port - * unsigned int t_clk t_clk of the MV-643xx chip in HZ units * unsigned int delay Delay in usec * * OUTPUT: @@ -1140,10 +1141,10 @@ static irqreturn_t mv643xx_eth_int_handler(int irq, void *dev_id) * */ static unsigned int eth_port_set_rx_coal(struct mv643xx_private *mp, - unsigned int t_clk, unsigned int delay) + unsigned int delay) { unsigned int port_num = mp->port_num; - unsigned int coal = ((t_clk / 1000000) * delay) / 64; + unsigned int coal = ((mp->shared->t_clk / 1000000) * delay) / 64; /* Set RX Coalescing mechanism */ wrl(mp, SDMA_CONFIG_REG(port_num), @@ -1168,7 +1169,6 @@ static unsigned int eth_port_set_rx_coal(struct mv643xx_private *mp, * * INPUT: * struct mv643xx_private *mp Ethernet port - * unsigned int t_clk t_clk of the MV-643xx chip in HZ units * unsigned int delay Delay in uSeconds * * OUTPUT: @@ -1179,9 +1179,9 @@ static unsigned int eth_port_set_rx_coal(struct mv643xx_private *mp, * */ static unsigned int eth_port_set_tx_coal(struct mv643xx_private *mp, - unsigned int t_clk, unsigned int delay) + unsigned int delay) { - unsigned int coal = ((t_clk / 1000000) * delay) / 64; + unsigned int coal = ((mp->shared->t_clk / 1000000) * delay) / 64; /* Set TX Coalescing mechanism */ wrl(mp, TX_FIFO_URGENT_THRESHOLD_REG(mp->port_num), coal << 4); @@ -1423,11 +1423,11 @@ static int mv643xx_eth_open(struct net_device *dev) #ifdef MV643XX_COAL mp->rx_int_coal = - eth_port_set_rx_coal(mp, 133000000, MV643XX_RX_COAL); + eth_port_set_rx_coal(mp, MV643XX_RX_COAL); #endif mp->tx_int_coal = - eth_port_set_tx_coal(mp, 133000000, MV643XX_TX_COAL); + eth_port_set_tx_coal(mp, MV643XX_TX_COAL); /* Unmask phy and link status changes interrupts */ wrl(mp, INTERRUPT_EXTEND_MASK_REG(port_num), ETH_INT_UNMASK_ALL_EXT); @@ -2063,6 +2063,7 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) goto out_free; spin_lock_init(&msp->phy_lock); + msp->t_clk = (pd != NULL && pd->t_clk != 0) ? pd->t_clk : 133000000; platform_set_drvdata(pdev, msp); diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 4801b02b444e..9f3a6032ff2e 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -17,6 +17,7 @@ struct mv643xx_eth_shared_platform_data { struct mbus_dram_target_info *dram; + unsigned int t_clk; }; struct mv643xx_eth_platform_data { -- cgit v1.2.3 From 240e4419e0cfcba737883b637ec2bdcc071ea03d Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 24 Apr 2008 01:27:44 +0200 Subject: mv643xx_eth: shorten shared platform driver name Change the MV643XX_ETH_SHARED_NAME platform driver name to something shorter than 19 characters, so that we can register multiple (otherwise we end up with sysfs conflicts since all instances will map to "mv643xx_eth_shared." as there is a 20-char sysfs file name limit.) Signed-off-by: Lennert Buytenhek Acked-by: Nicolas Pitre Signed-off-by: Dale Farnsworth --- include/linux/mv643xx_eth.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 9f3a6032ff2e..66dc9571922a 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -7,8 +7,8 @@ #include -#define MV643XX_ETH_SHARED_NAME "mv643xx_eth_shared" -#define MV643XX_ETH_NAME "mv643xx_eth" +#define MV643XX_ETH_SHARED_NAME "mv643xx_eth" +#define MV643XX_ETH_NAME "mv643xx_eth_port" #define MV643XX_ETH_SHARED_REGS 0x2000 #define MV643XX_ETH_SHARED_REGS_SIZE 0x2000 #define MV643XX_ETH_BAR_4 0x2220 -- cgit v1.2.3 From ce4e2e4558903ef92edf1ab4e09b0b338a09fd61 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 24 Apr 2008 01:29:59 +0200 Subject: mv643xx_eth: inter-mv643xx SMI port sharing There exist chips with up to four mv643xx_eth silicon blocks but only one external SMI (MII management) interface -- the SMI logic of the first block is shared by all the blocks. Handle this by allowing a per-port override of which mv643xx_eth_shared's SMI registers (and spinlock) to use. Signed-off-by: Lennert Buytenhek Acked-by: Nicolas Pitre Signed-off-by: Dale Farnsworth --- drivers/net/mv643xx_eth.c | 38 ++++++++++++++++++++++---------------- include/linux/mv643xx_eth.h | 2 ++ 2 files changed, 24 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index 8bd41e4e88a9..b7915cdcc6a5 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -527,6 +527,8 @@ struct mv643xx_private { struct mv643xx_shared_private *shared; int port_num; /* User Ethernet port number */ + struct mv643xx_shared_private *shared_smi; + u32 rx_sram_addr; /* Base address of rx sram area */ u32 rx_sram_size; /* Size of rx sram area */ u32 tx_sram_addr; /* Base address of tx sram area */ @@ -1898,6 +1900,10 @@ static int mv643xx_eth_probe(struct platform_device *pdev) if (mp->shared->win_protect) wrl(mp, WINDOW_PROTECT(port_num), mp->shared->win_protect); + mp->shared_smi = mp->shared; + if (pd->shared_smi != NULL) + mp->shared_smi = platform_get_drvdata(pd->shared_smi); + /* set default config values */ eth_port_uc_addr_get(mp, dev->dev_addr); mp->rx_ring_size = PORT_DEFAULT_RECEIVE_QUEUE_SIZE; @@ -2986,15 +2992,16 @@ static void eth_port_reset(struct mv643xx_private *mp) static void eth_port_read_smi_reg(struct mv643xx_private *mp, unsigned int phy_reg, unsigned int *value) { + void __iomem *smi_reg = mp->shared_smi->eth_base + SMI_REG; int phy_addr = ethernet_phy_get(mp); unsigned long flags; int i; /* the SMI register is a shared resource */ - spin_lock_irqsave(&mp->shared->phy_lock, flags); + spin_lock_irqsave(&mp->shared_smi->phy_lock, flags); /* wait for the SMI register to become available */ - for (i = 0; rdl(mp, SMI_REG) & ETH_SMI_BUSY; i++) { + for (i = 0; readl(smi_reg) & ETH_SMI_BUSY; i++) { if (i == PHY_WAIT_ITERATIONS) { printk("%s: PHY busy timeout\n", mp->dev->name); goto out; @@ -3002,11 +3009,11 @@ static void eth_port_read_smi_reg(struct mv643xx_private *mp, udelay(PHY_WAIT_MICRO_SECONDS); } - wrl(mp, SMI_REG, - (phy_addr << 16) | (phy_reg << 21) | ETH_SMI_OPCODE_READ); + writel((phy_addr << 16) | (phy_reg << 21) | ETH_SMI_OPCODE_READ, + smi_reg); /* now wait for the data to be valid */ - for (i = 0; !(rdl(mp, SMI_REG) & ETH_SMI_READ_VALID); i++) { + for (i = 0; !(readl(smi_reg) & ETH_SMI_READ_VALID); i++) { if (i == PHY_WAIT_ITERATIONS) { printk("%s: PHY read timeout\n", mp->dev->name); goto out; @@ -3014,9 +3021,9 @@ static void eth_port_read_smi_reg(struct mv643xx_private *mp, udelay(PHY_WAIT_MICRO_SECONDS); } - *value = rdl(mp, SMI_REG) & 0xffff; + *value = readl(smi_reg) & 0xffff; out: - spin_unlock_irqrestore(&mp->shared->phy_lock, flags); + spin_unlock_irqrestore(&mp->shared_smi->phy_lock, flags); } /* @@ -3042,17 +3049,16 @@ out: static void eth_port_write_smi_reg(struct mv643xx_private *mp, unsigned int phy_reg, unsigned int value) { - int phy_addr; - int i; + void __iomem *smi_reg = mp->shared_smi->eth_base + SMI_REG; + int phy_addr = ethernet_phy_get(mp); unsigned long flags; - - phy_addr = ethernet_phy_get(mp); + int i; /* the SMI register is a shared resource */ - spin_lock_irqsave(&mp->shared->phy_lock, flags); + spin_lock_irqsave(&mp->shared_smi->phy_lock, flags); /* wait for the SMI register to become available */ - for (i = 0; rdl(mp, SMI_REG) & ETH_SMI_BUSY; i++) { + for (i = 0; readl(smi_reg) & ETH_SMI_BUSY; i++) { if (i == PHY_WAIT_ITERATIONS) { printk("%s: PHY busy timeout\n", mp->dev->name); goto out; @@ -3060,10 +3066,10 @@ static void eth_port_write_smi_reg(struct mv643xx_private *mp, udelay(PHY_WAIT_MICRO_SECONDS); } - wrl(mp, SMI_REG, (phy_addr << 16) | (phy_reg << 21) | - ETH_SMI_OPCODE_WRITE | (value & 0xffff)); + writel((phy_addr << 16) | (phy_reg << 21) | + ETH_SMI_OPCODE_WRITE | (value & 0xffff), smi_reg); out: - spin_unlock_irqrestore(&mp->shared->phy_lock, flags); + spin_unlock_irqrestore(&mp->shared_smi->phy_lock, flags); } /* diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 66dc9571922a..a15cdd4a8e58 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -24,6 +24,8 @@ struct mv643xx_eth_platform_data { struct platform_device *shared; int port_number; + struct platform_device *shared_smi; + u16 force_phy_addr; /* force override if phy_addr == 0 */ u16 phy_addr; -- cgit v1.2.3 From 98db6f193c93e9b4729215af2c9101210e11d26c Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 29 Apr 2008 22:38:48 +0200 Subject: x86: fix section mismatch in pci_scan_bus Fix following section mismatch warning: WARNING: vmlinux.o(.text+0x275616): Section mismatch in reference from the function pci_scan_bus() to the function .devinit.text:pci_scan_bus_parented() The warning was seen with a CONFIG_DEBUG_SECTION_MISMATCH=y build. The inline function pci_scan_bus refer to functions annotated __devinit - so annotate it __devinit too. This revealed a few x86 specific functions that were only used from __init or __devinit context. So annotate these __devinit and the warning was killed. The added include in pci.h was not strictly required but added to avoid being dependent on indirect includes. Signed-off-by: Sam Ravnborg Signed-off-by: Jesse Barnes --- arch/x86/pci/common.c | 4 ++-- include/linux/pci.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 2a4d751818b7..88b5416cf009 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -489,7 +489,7 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } -struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) +struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) { struct pci_bus *bus = NULL; struct pci_sysdata *sd; @@ -512,7 +512,7 @@ struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) return bus; } -struct pci_bus *pci_scan_bus_with_sysdata(int busno) +struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno) { return pci_scan_bus_on_node(busno, &pci_root_ops, -1); } diff --git a/include/linux/pci.h b/include/linux/pci.h index 96acd0dae241..a59517b4930f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -44,6 +44,7 @@ #include #include +#include #include #include #include @@ -474,7 +475,7 @@ extern struct pci_bus *pci_find_bus(int domain, int busnr); void pci_bus_add_devices(struct pci_bus *bus); struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); -static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, +static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata) { struct pci_bus *root_bus; -- cgit v1.2.3 From 70b9f7dc1435412ca2b89b13a8353bd9915a7189 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 28 Apr 2008 16:27:23 -0700 Subject: x86/pci: remove flag in pci_cfg_space_size_ext so let pci_cfg_space_size call it directly without flag. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/fixup.c | 2 +- drivers/pci/probe.c | 33 +++++++++++++++++---------------- include/linux/pci.h | 2 +- 3 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index b60b2abd480c..ff3a6a336342 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -502,7 +502,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, */ static void fam10h_pci_cfg_space_size(struct pci_dev *dev) { - dev->cfg_size = pci_cfg_space_size_ext(dev, 0); + dev->cfg_size = pci_cfg_space_size_ext(dev); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4a55bf380957..3706ce7972dd 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -842,13 +842,25 @@ static void set_pcie_port_type(struct pci_dev *pdev) * reading the dword at 0x100 which must either be 0 or a valid extended * capability header. */ -int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix) +int pci_cfg_space_size_ext(struct pci_dev *dev) { - int pos; u32 status; - if (!check_exp_pcix) - goto skip; + if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) + goto fail; + if (status == 0xffffffff) + goto fail; + + return PCI_CFG_SPACE_EXP_SIZE; + + fail: + return PCI_CFG_SPACE_SIZE; +} + +int pci_cfg_space_size(struct pci_dev *dev) +{ + int pos; + u32 status; pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) { @@ -861,23 +873,12 @@ int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix) goto fail; } - skip: - if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) - goto fail; - if (status == 0xffffffff) - goto fail; - - return PCI_CFG_SPACE_EXP_SIZE; + return pci_cfg_space_size_ext(dev); fail: return PCI_CFG_SPACE_SIZE; } -int pci_cfg_space_size(struct pci_dev *dev) -{ - return pci_cfg_space_size_ext(dev, 1); -} - static void pci_release_bus_bridge_dev(struct device *dev) { kfree(dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index a59517b4930f..509159bcd4e7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -667,7 +667,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), void *userdata); -int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix); +int pci_cfg_space_size_ext(struct pci_dev *dev); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); -- cgit v1.2.3 From 4346f65426cbceb64794b468e4af6f5632d58c5e Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 30 Apr 2008 23:04:37 +0200 Subject: hrtimer: remove duplicate helper function The helper function hrtimer_callback_running() is used in kernel/hrtimer.c as well as in the updated net/can/bcm.c which now supports hrtimers. Moving the helper function to hrtimer.h removes the duplicate definition in the C-files. Signed-off-by: Oliver Hartkopp Cc: David Miller Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 9 +++++++++ kernel/hrtimer.c | 9 --------- net/can/bcm.c | 6 ------ 3 files changed, 9 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 31a4d653389f..6d93dce61cbb 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -316,6 +316,15 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) (HRTIMER_STATE_ENQUEUED | HRTIMER_STATE_PENDING); } +/* + * Helper function to check, whether the timer is running the callback + * function + */ +static inline int hrtimer_callback_running(struct hrtimer *timer) +{ + return timer->state & HRTIMER_STATE_CALLBACK; +} + /* Forward a hrtimer so it expires after now: */ extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 9af1d6a8095e..421be5fe5cc7 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -153,15 +153,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) ktime_add(xtim, tomono); } -/* - * Helper function to check, whether the timer is running the callback - * function - */ -static inline int hrtimer_callback_running(struct hrtimer *timer) -{ - return timer->state & HRTIMER_STATE_CALLBACK; -} - /* * Functions and macros which are different for UP/SMP systems are kept in a * single place diff --git a/net/can/bcm.c b/net/can/bcm.c index 74fd2d33aff4..d9a3a9d13bed 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -412,12 +412,6 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data) bcm_send_to_user(op, &head, data, 1); } -/* TODO: move to linux/hrtimer.h */ -static inline int hrtimer_callback_running(struct hrtimer *timer) -{ - return timer->state & HRTIMER_STATE_CALLBACK; -} - /* * bcm_rx_update_and_send - process a detected relevant receive content change * 1. update the last received data -- cgit v1.2.3 From d35c7b0e54a596c5a8134d75999b7f391a9c6550 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 3 May 2008 15:10:37 -0400 Subject: unified (weak) sys_pipe implementation This replaces the duplicated arch-specific versions of "sys_pipe()" with one unified implementation. This removes almost 250 lines of duplicated code. It's marked __weak, so that *if* an architecture wants to override the default implementation it can do so by simply having its own replacement version, since many architectures use alternate calling conventions for the 'pipe()' system call for legacy reasons (ie traditional UNIX implementations often return the two file descriptors in registers) I still haven't changed the cris version even though Linus says the BKL isn't needed. The arch maintainer can easily do it if there are really no obstacles. Signed-off-by: Ulrich Drepper Signed-off-by: Linus Torvalds --- arch/arm/kernel/sys_arm.c | 17 ----------------- arch/avr32/kernel/sys_avr32.c | 13 ------------- arch/blackfin/kernel/sys_bfin.c | 17 ----------------- arch/frv/kernel/sys_frv.c | 17 ----------------- arch/h8300/kernel/sys_h8300.c | 17 ----------------- arch/m68k/kernel/sys_m68k.c | 17 ----------------- arch/m68knommu/kernel/sys_m68k.c | 17 ----------------- arch/mn10300/kernel/sys_mn10300.c | 17 ----------------- arch/parisc/kernel/sys_parisc.c | 13 ------------- arch/powerpc/kernel/syscalls.c | 17 ----------------- arch/s390/kernel/sys_s390.c | 17 ----------------- arch/sh/kernel/sys_sh64.c | 17 ----------------- arch/um/kernel/syscall.c | 17 ----------------- arch/v850/kernel/syscalls.c | 17 ----------------- arch/x86/kernel/sys_i386_32.c | 17 ----------------- arch/x86/kernel/sys_x86_64.c | 17 ----------------- fs/pipe.c | 17 +++++++++++++++++ include/asm-powerpc/syscalls.h | 2 +- 18 files changed, 18 insertions(+), 265 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 9bd1870d980e..0128687ba0f7 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -34,23 +34,6 @@ extern unsigned long do_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr); -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ inline long do_mmap2( unsigned long addr, unsigned long len, diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c index 8deb6003ee62..8e8911e55c8f 100644 --- a/arch/avr32/kernel/sys_avr32.c +++ b/arch/avr32/kernel/sys_avr32.c @@ -14,19 +14,6 @@ #include #include -asmlinkage int sys_pipe(unsigned long __user *filedes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(filedes, fd, sizeof(fd))) - error = -EFAULT; - } - return error; -} - asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, off_t offset) diff --git a/arch/blackfin/kernel/sys_bfin.c b/arch/blackfin/kernel/sys_bfin.c index efb7b25a2633..fce49d7cf001 100644 --- a/arch/blackfin/kernel/sys_bfin.c +++ b/arch/blackfin/kernel/sys_bfin.c @@ -45,23 +45,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2 * sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ static inline long do_mmap2(unsigned long addr, unsigned long len, diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c index 04c6b1677ccf..49b2cf2c38f3 100644 --- a/arch/frv/kernel/sys_frv.c +++ b/arch/frv/kernel/sys_frv.c @@ -28,23 +28,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -asmlinkage long sys_pipe(unsigned long __user * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c index 00608be6d567..2745656dcc52 100644 --- a/arch/h8300/kernel/sys_h8300.c +++ b/arch/h8300/kernel/sys_h8300.c @@ -27,23 +27,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ static inline long do_mmap2( unsigned long addr, unsigned long len, diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index e892f17ba3fa..7f54efaf60bb 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -30,23 +30,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long __user * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ static inline long do_mmap2( unsigned long addr, unsigned long len, diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c index 65f7a95f056e..700281638629 100644 --- a/arch/m68knommu/kernel/sys_m68k.c +++ b/arch/m68knommu/kernel/sys_m68k.c @@ -28,23 +28,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ static inline long do_mmap2( unsigned long addr, unsigned long len, diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c index 5f17a1ebc825..bca5a84dc72c 100644 --- a/arch/mn10300/kernel/sys_mn10300.c +++ b/arch/mn10300/kernel/sys_mn10300.c @@ -28,23 +28,6 @@ #define MIN_MAP_ADDR PAGE_SIZE /* minimum fixed mmap address */ -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage long sys_pipe(unsigned long __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2 * sizeof(int))) - error = -EFAULT; - } - return error; -} - /* * memory mapping syscall */ diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 4f589216b39e..71b31957c8f1 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -33,19 +33,6 @@ #include #include -int sys_pipe(int __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - static unsigned long get_unshared_area(unsigned long addr, unsigned long len) { struct vm_area_struct *vma; diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index e722a4eeb5d0..4fe69ca24481 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -136,23 +136,6 @@ int sys_ipc(uint call, int first, unsigned long second, long third, return ret; } -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -int sys_pipe(int __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - static inline unsigned long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off, int shift) diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 988d0d64c2c8..5fdb799062b7 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -32,23 +32,6 @@ #include #include "entry.h" -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage long sys_pipe(unsigned long __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* common code for old and new mmaps */ static inline long do_mmap2( unsigned long addr, unsigned long len, diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c index 578004d71e02..91fb8445a5a0 100644 --- a/arch/sh/kernel/sys_sh64.c +++ b/arch/sh/kernel/sys_sh64.c @@ -30,23 +30,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - /* * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index 9cffc628a37e..128ee85bc8d9 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -73,23 +73,6 @@ long old_mmap(unsigned long addr, unsigned long len, out: return err; } -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -long sys_pipe(unsigned long __user * fildes) -{ - int fd[2]; - long error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, sizeof(fd))) - error = -EFAULT; - } - return error; -} - long sys_uname(struct old_utsname __user * name) { diff --git a/arch/v850/kernel/syscalls.c b/arch/v850/kernel/syscalls.c index 003db9c8c44a..1a83daf8e24f 100644 --- a/arch/v850/kernel/syscalls.c +++ b/arch/v850/kernel/syscalls.c @@ -132,23 +132,6 @@ sys_ipc (uint call, int first, int second, int third, void *ptr, long fifth) return ret; } -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -int sys_pipe (int *fildes) -{ - int fd[2]; - int error; - - error = do_pipe (fd); - if (!error) { - if (copy_to_user (fildes, fd, 2*sizeof (int))) - error = -EFAULT; - } - return error; -} - static inline unsigned long do_mmap2 (unsigned long addr, size_t len, unsigned long prot, unsigned long flags, diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index a86d26f036e1..d2ab52cc1d6b 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -22,23 +22,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long __user * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index bd802a5e1aa3..3b360ef33817 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -17,23 +17,6 @@ #include #include -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage long sys_pipe(int __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off) { diff --git a/fs/pipe.c b/fs/pipe.c index f73492b6817e..3499f9ff6316 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1075,6 +1075,23 @@ int do_pipe(int *fd) return error; } +/* + * sys_pipe() is the normal C calling standard for creating + * a pipe. It's not the way Unix traditionally does this, though. + */ +asmlinkage long __weak sys_pipe(int __user *fildes) +{ + int fd[2]; + int error; + + error = do_pipe(fd); + if (!error) { + if (copy_to_user(fildes, fd, sizeof(fd))) + error = -EFAULT; + } + return error; +} + /* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need diff --git a/include/asm-powerpc/syscalls.h b/include/asm-powerpc/syscalls.h index b3ca41fc8bb1..2b8a458f990a 100644 --- a/include/asm-powerpc/syscalls.h +++ b/include/asm-powerpc/syscalls.h @@ -30,7 +30,7 @@ asmlinkage int sys_fork(unsigned long p1, unsigned long p2, asmlinkage int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3, unsigned long p4, unsigned long p5, unsigned long p6, struct pt_regs *regs); -asmlinkage int sys_pipe(int __user *fildes); +asmlinkage long sys_pipe(int __user *fildes); asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act, struct sigaction __user *oact, size_t sigsetsize); -- cgit v1.2.3 From 41fef0ee7b8f3fe3f3dd2ddc9b170f3d88bce595 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sat, 3 May 2008 21:03:01 -0700 Subject: xfrm: convert empty xfrm_audit_* macros to functions it removes these warnings when CONFIG_AUDITSYSCALL is unset: net/xfrm/xfrm_user.c: In function 'xfrm_add_sa': net/xfrm/xfrm_user.c:412: warning: unused variable 'sid' net/xfrm/xfrm_user.c:411: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:410: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_del_sa': net/xfrm/xfrm_user.c:485: warning: unused variable 'sid' net/xfrm/xfrm_user.c:484: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:483: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_policy': net/xfrm/xfrm_user.c:1132: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1131: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1130: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_get_policy': net/xfrm/xfrm_user.c:1382: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1381: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1380: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_pol_expire': net/xfrm/xfrm_user.c:1620: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1619: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1618: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_sa_expire': net/xfrm/xfrm_user.c:1658: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1657: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1656: warning: unused variable 'loginuid' Signed-off-by: Marcin Slusarz Signed-off-by: David S. Miller --- include/net/xfrm.h | 48 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d1350bcccb03..2933d7474a79 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -648,14 +648,46 @@ extern void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family, extern void xfrm_audit_state_icvfail(struct xfrm_state *x, struct sk_buff *skb, u8 proto); #else -#define xfrm_audit_policy_add(x, r, a, se, s) do { ; } while (0) -#define xfrm_audit_policy_delete(x, r, a, se, s) do { ; } while (0) -#define xfrm_audit_state_add(x, r, a, se, s) do { ; } while (0) -#define xfrm_audit_state_delete(x, r, a, se, s) do { ; } while (0) -#define xfrm_audit_state_replay_overflow(x, s) do { ; } while (0) -#define xfrm_audit_state_notfound_simple(s, f) do { ; } while (0) -#define xfrm_audit_state_notfound(s, f, sp, sq) do { ; } while (0) -#define xfrm_audit_state_icvfail(x, s, p) do { ; } while (0) + +static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, + u32 auid, u32 ses, u32 secid) +{ +} + +static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, + u32 auid, u32 ses, u32 secid) +{ +} + +static inline void xfrm_audit_state_add(struct xfrm_state *x, int result, + u32 auid, u32 ses, u32 secid) +{ +} + +static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result, + u32 auid, u32 ses, u32 secid) +{ +} + +static inline void xfrm_audit_state_replay_overflow(struct xfrm_state *x, + struct sk_buff *skb) +{ +} + +static inline void xfrm_audit_state_notfound_simple(struct sk_buff *skb, + u16 family) +{ +} + +static inline void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family, + __be32 net_spi, __be32 net_seq) +{ +} + +static inline void xfrm_audit_state_icvfail(struct xfrm_state *x, + struct sk_buff *skb, u8 proto) +{ +} #endif /* CONFIG_AUDITSYSCALL */ static inline void xfrm_pol_hold(struct xfrm_policy *policy) -- cgit v1.2.3 From 67253af52e9133fb4cfbf7a2448a2d3524d1fa6c Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 25 Apr 2008 10:20:22 +0800 Subject: KVM: Add kvm_x86_ops get_tdp_level() The function get_tdp_level() provided the number of tdp level for EPT and NPT rather than the NPT specific macro. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/mmu.h | 6 ------ arch/x86/kvm/svm.c | 10 ++++++++++ arch/x86/kvm/vmx.c | 6 ++++++ arch/x86/kvm/vmx.h | 1 + include/asm-x86/kvm_host.h | 1 + 6 files changed, 20 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index bcfaf7e4a2dc..20fb3c852db7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1343,7 +1343,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, - largepage, gfn, pfn, TDP_ROOT_LEVEL); + largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); spin_unlock(&vcpu->kvm->mmu_lock); return r; @@ -1450,7 +1450,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->page_fault = tdp_page_fault; context->free = nonpaging_free; context->prefetch_page = nonpaging_prefetch_page; - context->shadow_root_level = TDP_ROOT_LEVEL; + context->shadow_root_level = kvm_x86_ops->get_tdp_level(); context->root_hpa = INVALID_PAGE; if (!is_paging(vcpu)) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index a4fcb78ebf42..1730757bbc7a 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -3,12 +3,6 @@ #include -#ifdef CONFIG_X86_64 -#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL -#else -#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL -#endif - #define PT64_PT_BITS 9 #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) #define PT32_PT_BITS 10 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 89e0be2c10d0..ab22615eee89 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1863,6 +1863,15 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } +static int get_npt_level(void) +{ +#ifdef CONFIG_X86_64 + return PT64_ROOT_LEVEL; +#else + return PT32E_ROOT_LEVEL; +#endif +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -1920,6 +1929,7 @@ static struct kvm_x86_ops svm_x86_ops = { .inject_pending_vectors = do_interrupt_requests, .set_tss_addr = svm_set_tss_addr, + .get_tdp_level = get_npt_level, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d93250d11caa..98e4f2b036de 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2788,6 +2788,11 @@ static void __init vmx_check_processor_compat(void *rtn) } } +static int get_ept_level(void) +{ + return VMX_EPT_DEFAULT_GAW + 1; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -2844,6 +2849,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .inject_pending_vectors = do_interrupt_requests, .set_tss_addr = vmx_set_tss_addr, + .get_tdp_level = get_ept_level, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 5f7fdc965d39..093b085daf6a 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -351,5 +351,6 @@ enum vmcs_field { #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) +#define VMX_EPT_DEFAULT_GAW 3 #endif diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 9d963cd6533c..897a1be24cf7 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -422,6 +422,7 @@ struct kvm_x86_ops { struct kvm_run *run); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); + int (*get_tdp_level)(void); }; extern struct kvm_x86_ops *kvm_x86_ops; -- cgit v1.2.3 From 7b52345e2c4c7333bf7eba8034ffc4683fa63c91 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 25 Apr 2008 21:13:50 +0800 Subject: KVM: MMU: Add EPT support Enable kvm_set_spte() to generate EPT entries. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 43 +++++++++++++++++++++++++++++++++---------- arch/x86/kvm/x86.c | 3 +++ include/asm-x86/kvm_host.h | 3 +++ 3 files changed, 39 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 20fb3c852db7..c28a36b4cbba 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -152,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache; static u64 __read_mostly shadow_trap_nonpresent_pte; static u64 __read_mostly shadow_notrap_nonpresent_pte; +static u64 __read_mostly shadow_base_present_pte; +static u64 __read_mostly shadow_nx_mask; +static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ +static u64 __read_mostly shadow_user_mask; +static u64 __read_mostly shadow_accessed_mask; +static u64 __read_mostly shadow_dirty_mask; void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) { @@ -160,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) } EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); +void kvm_mmu_set_base_ptes(u64 base_pte) +{ + shadow_base_present_pte = base_pte; +} +EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); + +void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, + u64 dirty_mask, u64 nx_mask, u64 x_mask) +{ + shadow_user_mask = user_mask; + shadow_accessed_mask = accessed_mask; + shadow_dirty_mask = dirty_mask; + shadow_nx_mask = nx_mask; + shadow_x_mask = x_mask; +} +EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); + static int is_write_protection(struct kvm_vcpu *vcpu) { return vcpu->arch.cr0 & X86_CR0_WP; @@ -198,7 +221,7 @@ static int is_writeble_pte(unsigned long pte) static int is_dirty_pte(unsigned long pte) { - return pte & PT_DIRTY_MASK; + return pte & shadow_dirty_mask; } static int is_rmap_pte(u64 pte) @@ -513,7 +536,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) return; sp = page_header(__pa(spte)); pfn = spte_to_pfn(*spte); - if (*spte & PT_ACCESSED_MASK) + if (*spte & shadow_accessed_mask) kvm_set_pfn_accessed(pfn); if (is_writeble_pte(*spte)) kvm_release_pfn_dirty(pfn); @@ -1039,17 +1062,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, * whether the guest actually used the pte (in order to detect * demand paging). */ - spte = PT_PRESENT_MASK | PT_DIRTY_MASK; + spte = shadow_base_present_pte | shadow_dirty_mask; if (!speculative) pte_access |= PT_ACCESSED_MASK; if (!dirty) pte_access &= ~ACC_WRITE_MASK; - if (!(pte_access & ACC_EXEC_MASK)) - spte |= PT64_NX_MASK; - - spte |= PT_PRESENT_MASK; + if (pte_access & ACC_EXEC_MASK) + spte |= shadow_x_mask; + else + spte |= shadow_nx_mask; if (pte_access & ACC_USER_MASK) - spte |= PT_USER_MASK; + spte |= shadow_user_mask; if (largepage) spte |= PT_PAGE_SIZE_MASK; @@ -1155,7 +1178,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, } table[index] = __pa(new_table->spt) | PT_PRESENT_MASK - | PT_WRITABLE_MASK | PT_USER_MASK; + | PT_WRITABLE_MASK | shadow_user_mask; } table_addr = table[index] & PT64_BASE_ADDR_MASK; } @@ -1599,7 +1622,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) { u64 *spte = vcpu->arch.last_pte_updated; - return !!(spte && (*spte & PT_ACCESSED_MASK)); + return !!(spte && (*spte & shadow_accessed_mask)); } static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ce556372a4d..0735efbfa712 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque) kvm_x86_ops = ops; kvm_mmu_set_nonpresent_ptes(0ull, 0ull); + kvm_mmu_set_base_ptes(PT_PRESENT_MASK); + kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, + PT_DIRTY_MASK, PT64_NX_MASK, 0); return 0; out: diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 897a1be24cf7..d1dedda958ff 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -434,6 +434,9 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); +void kvm_mmu_set_base_ptes(u64 base_pte); +void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, + u64 dirty_mask, u64 nx_mask, u64 x_mask); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); -- cgit v1.2.3 From b7ebfb0509692cd923e31650f81ed4d79c9a3e59 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 25 Apr 2008 21:44:52 +0800 Subject: KVM: VMX: Prepare an identity page table for EPT in real mode [aliguory: plug leak] Signed-off-by: Sheng Yang Signed-off-by: Anthony Liguori Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 79 ++++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kvm/vmx.h | 3 ++ arch/x86/kvm/x86.c | 2 ++ include/asm-x86/kvm_host.h | 3 ++ 4 files changed, 84 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 98e4f2b036de..de5f6150f2f7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -87,7 +87,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) return container_of(vcpu, struct vcpu_vmx, vcpu); } -static int init_rmode_tss(struct kvm *kvm); +static int init_rmode(struct kvm *kvm); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -1304,7 +1304,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); kvm_mmu_reset_context(vcpu); - init_rmode_tss(vcpu->kvm); + init_rmode(vcpu->kvm); } #ifdef CONFIG_X86_64 @@ -1578,6 +1578,41 @@ out: return ret; } +static int init_rmode_identity_map(struct kvm *kvm) +{ + int i, r, ret; + pfn_t identity_map_pfn; + u32 tmp; + + if (!vm_need_ept()) + return 1; + if (unlikely(!kvm->arch.ept_identity_pagetable)) { + printk(KERN_ERR "EPT: identity-mapping pagetable " + "haven't been allocated!\n"); + return 0; + } + if (likely(kvm->arch.ept_identity_pagetable_done)) + return 1; + ret = 0; + identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT; + r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); + if (r < 0) + goto out; + /* Set up identity-mapping pagetable for EPT in real mode */ + for (i = 0; i < PT32_ENT_PER_PAGE; i++) { + tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | + _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); + r = kvm_write_guest_page(kvm, identity_map_pfn, + &tmp, i * sizeof(tmp), sizeof(tmp)); + if (r < 0) + goto out; + } + kvm->arch.ept_identity_pagetable_done = true; + ret = 1; +out: + return ret; +} + static void seg_setup(int seg) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -1612,6 +1647,31 @@ out: return r; } +static int alloc_identity_pagetable(struct kvm *kvm) +{ + struct kvm_userspace_memory_region kvm_userspace_mem; + int r = 0; + + down_write(&kvm->slots_lock); + if (kvm->arch.ept_identity_pagetable) + goto out; + kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; + kvm_userspace_mem.flags = 0; + kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; + kvm_userspace_mem.memory_size = PAGE_SIZE; + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); + if (r) + goto out; + + down_read(¤t->mm->mmap_sem); + kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, + VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); +out: + up_write(&kvm->slots_lock); + return r; +} + static void allocate_vpid(struct vcpu_vmx *vmx) { int vpid; @@ -1775,6 +1835,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) return 0; } +static int init_rmode(struct kvm *kvm) +{ + if (!init_rmode_tss(kvm)) + return 0; + if (!init_rmode_identity_map(kvm)) + return 0; + return 1; +} + static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -1782,7 +1851,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) int ret; down_read(&vcpu->kvm->slots_lock); - if (!init_rmode_tss(vmx->vcpu.kvm)) { + if (!init_rmode(vmx->vcpu.kvm)) { ret = -ENOMEM; goto out; } @@ -2759,6 +2828,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (alloc_apic_access_page(kvm) != 0) goto free_vmcs; + if (vm_need_ept()) + if (alloc_identity_pagetable(kvm) != 0) + goto free_vmcs; + return &vmx->vcpu; free_vmcs: diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 093b085daf6a..f97eccc754e8 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -340,6 +340,7 @@ enum vmcs_field { #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 #define VMX_NR_VPIDS (1 << 16) #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 @@ -353,4 +354,6 @@ enum vmcs_field { #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) #define VMX_EPT_DEFAULT_GAW 3 +#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0735efbfa712..1842a86f7c33 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3909,6 +3909,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_free_physmem(kvm); if (kvm->arch.apic_access_page) put_page(kvm->arch.apic_access_page); + if (kvm->arch.ept_identity_pagetable) + put_page(kvm->arch.ept_identity_pagetable); kfree(kvm); } diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index d1dedda958ff..e24afaa64a4d 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -314,6 +314,9 @@ struct kvm_arch{ struct page *apic_access_page; gpa_t wall_clock; + + struct page *ept_identity_pagetable; + bool ept_identity_pagetable_done; }; struct kvm_vm_stat { -- cgit v1.2.3 From 1439442c7b257b47a83aea4daed8fbf4a32cdff9 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 28 Apr 2008 12:24:45 +0800 Subject: KVM: VMX: Enable EPT feature for KVM Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 5 +- arch/x86/kvm/vmx.c | 229 +++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kvm/vmx.h | 9 ++ include/asm-x86/kvm_host.h | 1 + 4 files changed, 234 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3dbedf169730..d9344be36442 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1177,8 +1177,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, return -ENOMEM; } - table[index] = __pa(new_table->spt) | PT_PRESENT_MASK - | PT_WRITABLE_MASK | shadow_user_mask; + table[index] = __pa(new_table->spt) + | PT_PRESENT_MASK | PT_WRITABLE_MASK + | shadow_user_mask | shadow_x_mask; } table_addr = table[index] & PT64_BASE_ADDR_MASK; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index de5f6150f2f7..bfe4db11989c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -42,7 +42,7 @@ module_param(enable_vpid, bool, 0); static int flexpriority_enabled = 1; module_param(flexpriority_enabled, bool, 0); -static int enable_ept; +static int enable_ept = 1; module_param(enable_ept, bool, 0); struct vmcs { @@ -284,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) : : "a"(&operand), "c"(ext) : "cc", "memory"); } +static inline void __invept(int ext, u64 eptp, gpa_t gpa) +{ + struct { + u64 eptp, gpa; + } operand = {eptp, gpa}; + + asm volatile (ASM_VMX_INVEPT + /* CF==1 or ZF==1 --> rc = -1 */ + "; ja 1f ; ud2 ; 1:\n" + : : "a" (&operand), "c" (ext) : "cc", "memory"); +} + static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -335,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); } +static inline void ept_sync_global(void) +{ + if (cpu_has_vmx_invept_global()) + __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); +} + +static inline void ept_sync_context(u64 eptp) +{ + if (vm_need_ept()) { + if (cpu_has_vmx_invept_context()) + __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); + else + ept_sync_global(); + } +} + +static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) +{ + if (vm_need_ept()) { + if (cpu_has_vmx_invept_individual_addr()) + __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, + eptp, gpa); + else + ept_sync_context(eptp); + } +} + static unsigned long vmcs_readl(unsigned long field) { unsigned long value; @@ -422,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) eb |= 1u << 1; if (vcpu->arch.rmode.active) eb = ~0; + if (vm_need_ept()) + eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -1352,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; } +static void ept_load_pdptrs(struct kvm_vcpu *vcpu) +{ + if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (!load_pdptrs(vcpu, vcpu->arch.cr3)) { + printk(KERN_ERR "EPT: Fail to load pdptrs!\n"); + return; + } + vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); + vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); + vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); + vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]); + } +} + +static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); + +static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, + unsigned long cr0, + struct kvm_vcpu *vcpu) +{ + if (!(cr0 & X86_CR0_PG)) { + /* From paging/starting to nonpaging */ + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, + vmcs_config.cpu_based_exec_ctrl | + (CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING)); + vcpu->arch.cr0 = cr0; + vmx_set_cr4(vcpu, vcpu->arch.cr4); + *hw_cr0 |= X86_CR0_PE | X86_CR0_PG; + *hw_cr0 &= ~X86_CR0_WP; + } else if (!is_paging(vcpu)) { + /* From nonpaging to paging */ + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, + vmcs_config.cpu_based_exec_ctrl & + ~(CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING)); + vcpu->arch.cr0 = cr0; + vmx_set_cr4(vcpu, vcpu->arch.cr4); + if (!(vcpu->arch.cr0 & X86_CR0_WP)) + *hw_cr0 &= ~X86_CR0_WP; + } +} + +static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, + struct kvm_vcpu *vcpu) +{ + if (!is_paging(vcpu)) { + *hw_cr4 &= ~X86_CR4_PAE; + *hw_cr4 |= X86_CR4_PSE; + } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) + *hw_cr4 &= ~X86_CR4_PAE; +} + static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { + unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | + KVM_VM_CR0_ALWAYS_ON; + vmx_fpu_deactivate(vcpu); if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) @@ -1371,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif + if (vm_need_ept()) + ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); + vmcs_writel(CR0_READ_SHADOW, cr0); - vmcs_writel(GUEST_CR0, - (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); + vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) vmx_fpu_activate(vcpu); } +static u64 construct_eptp(unsigned long root_hpa) +{ + u64 eptp; + + /* TODO write the value reading from MSR */ + eptp = VMX_EPT_DEFAULT_MT | + VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; + eptp |= (root_hpa & PAGE_MASK); + + return eptp; +} + static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + unsigned long guest_cr3; + u64 eptp; + + guest_cr3 = cr3; + if (vm_need_ept()) { + eptp = construct_eptp(cr3); + vmcs_write64(EPT_POINTER, eptp); + ept_sync_context(eptp); + ept_load_pdptrs(vcpu); + guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : + VMX_EPT_IDENTITY_PAGETABLE_ADDR; + } + vmx_flush_tlb(vcpu); - vmcs_writel(GUEST_CR3, cr3); + vmcs_writel(GUEST_CR3, guest_cr3); if (vcpu->arch.cr0 & X86_CR0_PE) vmx_fpu_deactivate(vcpu); } static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - vmcs_writel(CR4_READ_SHADOW, cr4); - vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ? - KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); + unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ? + KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); + vcpu->arch.cr4 = cr4; + if (vm_need_ept()) + ept_update_paging_mode_cr4(&hw_cr4, vcpu); + + vmcs_writel(CR4_READ_SHADOW, cr4); + vmcs_writel(GUEST_CR4, hw_cr4); } static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) @@ -2116,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { + /* EPT won't cause page fault directly */ + if (vm_need_ept()) + BUG(); cr2 = vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 >> 32), handler); @@ -2445,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return kvm_task_switch(vcpu, tss_selector, reason); } +static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u64 exit_qualification; + enum emulation_result er; + gpa_t gpa; + unsigned long hva; + int gla_validity; + int r; + + exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + + if (exit_qualification & (1 << 6)) { + printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); + return -ENOTSUPP; + } + + gla_validity = (exit_qualification >> 7) & 0x3; + if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) { + printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); + printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", + (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), + (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); + printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", + (long unsigned int)exit_qualification); + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 0; + return -ENOTSUPP; + } + + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); + hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); + if (!kvm_is_error_hva(hva)) { + r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); + if (r < 0) { + printk(KERN_ERR "EPT: Not enough memory!\n"); + return -ENOMEM; + } + return 1; + } else { + /* must be MMIO */ + er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); + + if (er == EMULATE_FAIL) { + printk(KERN_ERR + "EPT: Fail to handle EPT violation vmexit!er is %d\n", + er); + printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", + (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), + (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); + printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", + (long unsigned int)exit_qualification); + return -ENOTSUPP; + } else if (er == EMULATE_DO_MMIO) + return 0; + } + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -2468,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, + [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, }; static const int kvm_vmx_max_exit_handlers = @@ -2486,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); + /* Access CR3 don't cause VMExit in paging mode, so we need + * to sync with guest real CR3. */ + if (vm_need_ept() && is_paging(vcpu)) { + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + ept_load_pdptrs(vcpu); + } + if (unlikely(vmx->fail)) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason @@ -2494,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } if ((vectoring_info & VECTORING_INFO_VALID_MASK) && - exit_reason != EXIT_REASON_EXCEPTION_NMI) + (exit_reason != EXIT_REASON_EXCEPTION_NMI && + exit_reason != EXIT_REASON_EPT_VIOLATION)) printk(KERN_WARNING "%s: unexpected, valid vectoring info and " "exit reason is 0x%x\n", __func__, exit_reason); if (exit_reason < kvm_vmx_max_exit_handlers @@ -2796,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return ERR_PTR(-ENOMEM); allocate_vpid(vmx); + if (id == 0 && vm_need_ept()) { + kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | + VMX_EPT_WRITABLE_MASK | + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, + VMX_EPT_FAKE_DIRTY_MASK, 0ull, + VMX_EPT_EXECUTABLE_MASK); + kvm_enable_tdp(); + } err = kvm_vcpu_init(&vmx->vcpu, kvm, id); if (err) @@ -2975,9 +3183,14 @@ static int __init vmx_init(void) vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); + if (cpu_has_vmx_ept()) + bypass_guest_pf = 0; + if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); + ept_sync_global(); + return 0; out2: diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index f97eccc754e8..79d94c610dfe 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -353,6 +353,15 @@ enum vmcs_field { #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) #define VMX_EPT_DEFAULT_GAW 3 +#define VMX_EPT_MAX_GAW 0x4 +#define VMX_EPT_MT_EPTE_SHIFT 3 +#define VMX_EPT_GAW_EPTP_SHIFT 3 +#define VMX_EPT_DEFAULT_MT 0x6ull +#define VMX_EPT_READABLE_MASK 0x1ull +#define VMX_EPT_WRITABLE_MASK 0x2ull +#define VMX_EPT_EXECUTABLE_MASK 0x4ull +#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62) +#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63) #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index e24afaa64a4d..4baa9c9e1f41 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -651,6 +651,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" +#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" #define MSR_IA32_TIME_STAMP_COUNTER 0x010 -- cgit v1.2.3 From 45c5eb67da5a668abe79c23a7e64dbc87a600f90 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Fri, 25 Apr 2008 17:55:49 -0500 Subject: KVM: ppc: Handle guest idle by emulating MSR[WE] writes This reduces host CPU usage when the guest is idle. However, the guest must set MSR[WE] in its idle loop, which Linux did not do until 2.6.26. Signed-off-by: Hollis Blanchard Signed-off-by: Jerone Young Signed-off-by: Avi Kivity --- arch/powerpc/kvm/booke_guest.c | 1 + arch/powerpc/kvm/powerpc.c | 20 +++++++++++++++++--- include/asm-powerpc/kvm_host.h | 1 + include/asm-powerpc/kvm_ppc.h | 5 +++++ 4 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c index 6d9884a6884a..b3db6f4576ad 100644 --- a/arch/powerpc/kvm/booke_guest.c +++ b/arch/powerpc/kvm/booke_guest.c @@ -49,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "inst_emu", VCPU_STAT(emulated_inst_exits) }, { "dec", VCPU_STAT(dec_exits) }, { "ext_intr", VCPU_STAT(ext_intr_exits) }, + { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { NULL } }; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index bad40bd2d3ac..777e0f34e0ea 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -36,13 +36,12 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - /* XXX implement me */ - return 0; + return !!(v->arch.pending_exceptions); } int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return 1; + return !(v->arch.msr & MSR_WE); } @@ -214,6 +213,11 @@ static void kvmppc_decrementer_func(unsigned long data) struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); + + if (waitqueue_active(&vcpu->wq)) { + wake_up_interruptible(&vcpu->wq); + vcpu->stat.halt_wakeup++; + } } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) @@ -339,6 +343,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int r; sigset_t sigsaved; + vcpu_load(vcpu); + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); @@ -363,12 +369,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); + vcpu_put(vcpu); + return r; } int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); + + if (waitqueue_active(&vcpu->wq)) { + wake_up_interruptible(&vcpu->wq); + vcpu->stat.halt_wakeup++; + } + return 0; } diff --git a/include/asm-powerpc/kvm_host.h b/include/asm-powerpc/kvm_host.h index 04ffbb8e0a35..81a69d711017 100644 --- a/include/asm-powerpc/kvm_host.h +++ b/include/asm-powerpc/kvm_host.h @@ -59,6 +59,7 @@ struct kvm_vcpu_stat { u32 emulated_inst_exits; u32 dec_exits; u32 ext_intr_exits; + u32 halt_wakeup; }; struct tlbe { diff --git a/include/asm-powerpc/kvm_ppc.h b/include/asm-powerpc/kvm_ppc.h index 7ac820308a7e..b35a7e3ef978 100644 --- a/include/asm-powerpc/kvm_ppc.h +++ b/include/asm-powerpc/kvm_ppc.h @@ -77,12 +77,17 @@ static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) clear_bit(priority, &vcpu->arch.pending_exceptions); } +/* Helper function for "full" MSR writes. No need to call this if only EE is + * changing. */ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) { if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); vcpu->arch.msr = new_msr; + + if (vcpu->arch.msr & MSR_WE) + kvm_vcpu_block(vcpu); } #endif /* __POWERPC_KVM_PPC_H__ */ -- cgit v1.2.3 From bc1a34f1bf354fabc03e3f465620c80e510d0e8f Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 1 May 2008 18:43:33 +0200 Subject: KVM: avoid fx_init() schedule in atomic This make sure not to schedule in atomic during fx_init. I also changed the name of fpu_init to fx_finit to avoid duplicating the name with fpu_init that is already used in the kernel, this makes grep simpler if nothing else. Signed-off-by: Andrea Arcangeli Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 11 ++++++++++- include/asm-x86/kvm_host.h | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bc224bba1e87..21338bdb28ff 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3703,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu) { unsigned after_mxcsr_mask; + /* + * Touch the fpu the first time in non atomic context as if + * this is the first fpu instruction the exception handler + * will fire before the instruction returns and it'll have to + * allocate ram with GFP_KERNEL. + */ + if (!used_math()) + fx_save(&vcpu->arch.host_fx_image); + /* Initialize guest FPU by resetting ours and saving into guest's */ preempt_disable(); fx_save(&vcpu->arch.host_fx_image); - fpu_init(); + fx_finit(); fx_save(&vcpu->arch.guest_fx_image); fx_restore(&vcpu->arch.host_fx_image); preempt_enable(); diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 4baa9c9e1f41..1d8cd01fa514 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -627,7 +627,7 @@ static inline void fx_restore(struct i387_fxsave_struct *image) asm("fxrstor (%0)":: "r" (image)); } -static inline void fpu_init(void) +static inline void fx_finit(void) { asm("finit"); } -- cgit v1.2.3 From afaafe50ee15c59010f19273ebfb6c44f0962d7c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 2 May 2008 21:14:20 +1000 Subject: x86: fix up bootparam.h for userspace inclusion commit 8b664aa66e824a0ddf4ec56d41fa0cf7bb374de6 (x86, boot: add linked list of struct setup_data) put a new struct in bootparam.h, but didn't use the userspace-safe types. Signed-off-by: Rusty Russell Cc: Huang Ying Acked-by: H. Peter Anvin Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-x86/bootparam.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h index e8659909e5f6..f62f4733606b 100644 --- a/include/asm-x86/bootparam.h +++ b/include/asm-x86/bootparam.h @@ -14,10 +14,10 @@ /* extensible setup data list node */ struct setup_data { - u64 next; - u32 type; - u32 len; - u8 data[0]; + __u64 next; + __u32 type; + __u32 len; + __u8 data[0]; }; struct setup_header { -- cgit v1.2.3 From 36bbfe2f097d5e09e8e9c83f55264bd538a0ebe1 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 3 May 2008 23:51:03 +0300 Subject: fix asm-alpha/types.h breakage This patch fixes the following compile error on alpha caused by commit 3726c23df8e4d95b6f2b335dfa90e3f4850a8a00 (alpha: types: use for the alpha architecture): <-- snip --> ... CC arch/alpha/kernel/asm-offsets.s In file included from include2/asm/topology.h:6, from /home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/topology.h:34, from /home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/mmzone.h:683, from /home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/gfp.h:4, from /home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/slab.h:12, from /home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/percpu.h:5, from /home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/rcupdate.h:39, from /home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/pid.h:4, from /home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/sched.h:74, from /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/alpha/kernel/asm-offsets.c:9: include2/asm/machvec.h:44: error: expected declaration specifiers or '...' before 'dma_addr_t' include2/asm/machvec.h:44: error: expected declaration specifiers or '...' before 'dma_addr_t' In file included from /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/alpha/kernel/asm-offsets.c:12: include2/asm/io.h:94: warning: type defaults to 'int' in declaration of 'dma_addr_t' include2/asm/io.h:94: warning: variable 'dma_addr_t' declared 'inline' include2/asm/io.h:94: error: expected ',' or ';' before 'isa_page_to_bus' make[2]: *** [arch/alpha/kernel/asm-offsets.s] Error 1 <-- snip --> Cc: Richard Henderson Cc: Ivan Kokshaysky Signed-off-by: H. Peter Anvin --- include/asm-alpha/types.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/asm-alpha/types.h b/include/asm-alpha/types.h index a9e34ca4d463..c1541353ccef 100644 --- a/include/asm-alpha/types.h +++ b/include/asm-alpha/types.h @@ -23,5 +23,11 @@ typedef unsigned int umode_t; #define BITS_PER_LONG 64 +#ifndef __ASSEMBLY__ + +typedef u64 dma_addr_t; +typedef u64 dma64_addr_t; + +#endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ALPHA_TYPES_H */ -- cgit v1.2.3 From 2961b423037da60a8cb230963ee0d8c04473d73b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 3 May 2008 22:26:17 +0300 Subject: fix asm-mips/types.h syntax error This patch fixes the following compile error caused by commit 23cf11ddb5099f8c7f7cb3eb154bff0faf31cae9 (mips: types: use for the mips architecture): <-- snip --> ... CC kernel/bounds.s In file included from /home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/types.h:12, from /home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/page-flags.h:8, from /home/bunk/linux/kernel-2.6/git/linux-2.6/kernel/bounds.c:9: include2/asm/types.h:56:2: error: #endif without #if make[2]: *** [kernel/bounds.s] Error 1 <-- snip --> Signed-off-by: Adrian Bunk Cc: Ralf Baechle Signed-off-by: H. Peter Anvin --- include/asm-mips/types.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/asm-mips/types.h b/include/asm-mips/types.h index 7a2ee4f40131..bcbb8d675af5 100644 --- a/include/asm-mips/types.h +++ b/include/asm-mips/types.h @@ -19,8 +19,6 @@ typedef unsigned short umode_t; -#endif - #endif /* __ASSEMBLY__ */ /* -- cgit v1.2.3 From e73b65f1db7e3baa3db43951476b7d2d2381ba35 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 4 May 2008 09:29:43 +0200 Subject: sysfs: build fix x86.git testing found the following build failure on v2.6.26-rc1: In file included from include/linux/kobject.h:22, from include/linux/module.h:17, from include/linux/crypto.h:22, from arch/x86/kernel/asm-offsets_32.c:8, from arch/x86/kernel/asm-offsets.c:3: include/linux/sysfs.h:201: error: redefinition of 'sysfs_update_group' include/linux/sysfs.h:195: error: previous definition of 'sysfs_update_group' was here make[1]: *** [arch/x86/kernel/asm-offsets.s] Error 1 make: *** [prepare0] Error 2 with the following config: http://redhat.com/~mingo/misc/config-Sun_May__4_07_09_30_CEST_2008.bad the reason for the build failure is the duplicate definition of the sysfs_update_group() inline function in include/linux/sysfs.h. The duplication was a merge error: it was added via -mm by commit v2.6.25-7262-g2850699, "sysfs: sysfs_update_group stub for CONFIG_SYSFS=n" a day before v2.6.26-rc1, but a day before that the same commit was already merged upstream via the sysfs tree, with commit v2.6.25-7211-g1cbfb7a. Signed-off-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/sysfs.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 27bad59dae79..7858eac40aa7 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -196,12 +196,6 @@ static inline int sysfs_update_group(struct kobject *kobj, return 0; } -static inline int sysfs_update_group(struct kobject *kobj, - const struct attribute_group *grp) -{ - return 0; -} - static inline void sysfs_remove_group(struct kobject *kobj, const struct attribute_group *grp) { -- cgit v1.2.3 From 0bbeafd0118fc3ae54990064760c889d41dc21d6 Mon Sep 17 00:00:00 2001 From: Satoru SATOH Date: Sun, 4 May 2008 22:12:43 -0700 Subject: ip: Make use of the inline function dst_metric_locked() Signed-off-by: Satoru SATOH Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/route.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 6d7bcd5e62d4..3b40bc2234be 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -210,7 +210,7 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) { return (inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO || (inet_sk(sk)->pmtudisc == IP_PMTUDISC_WANT && - !(dst_metric(dst, RTAX_LOCK)&(1<u.dst.metrics)); if (fi->fib_mtu == 0) { rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; - if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) && + if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && rt->rt_gateway != rt->rt_dst && rt->u.dst.dev->mtu > 576) rt->u.dst.metrics[RTAX_MTU-1] = 576; -- cgit v1.2.3 From b41e5fffe8b81fc939067d8c1c195cc79115d5a3 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Sat, 3 May 2008 06:34:04 +1000 Subject: [POWERPC] devres: Add devm_ioremap_prot() We provide an ioremap_flags, so this provides a corresponding devm_ioremap_prot. The slight name difference is at Ben Herrenschmidt's request as he plans on changing ioremap_flags to ioremap_prot in the future. Signed-off-by: Emil Medve Signed-off-by: Kumar Gala Acked-by: Tejun Heo Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/lib/Makefile | 1 + arch/powerpc/lib/devres.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/asm-powerpc/io.h | 8 +++++++- include/linux/io.h | 1 + lib/devres.c | 2 +- 5 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/lib/devres.c (limited to 'include') diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 4bb023f4c869..f1d2cdc5331b 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_SMP) += locks.o endif obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o +obj-$(CONFIG_HAS_IOMEM) += devres.o diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c new file mode 100644 index 000000000000..292115d98ea9 --- /dev/null +++ b/arch/powerpc/lib/devres.c @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include /* devres_*(), devm_ioremap_release() */ +#include /* ioremap_flags() */ +#include /* EXPORT_SYMBOL() */ + +/** + * devm_ioremap_prot - Managed ioremap_flags() + * @dev: Generic device to remap IO address for + * @offset: BUS offset to map + * @size: Size of map + * @flags: Page flags + * + * Managed ioremap_prot(). Map is automatically unmapped on driver + * detach. + */ +void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags) +{ + void __iomem **ptr, *addr; + + ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + addr = ioremap_flags(offset, size, flags); + if (addr) { + *ptr = addr; + devres_add(dev, ptr); + } else + devres_free(ptr); + + return addr; +} +EXPORT_SYMBOL(devm_ioremap_prot); diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h index afae0697e8ce..e0062d73db1c 100644 --- a/include/asm-powerpc/io.h +++ b/include/asm-powerpc/io.h @@ -2,7 +2,7 @@ #define _ASM_POWERPC_IO_H #ifdef __KERNEL__ -/* +/* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -18,6 +18,9 @@ extern int check_legacy_ioport(unsigned long base_port); #define _PNPWRP 0xa79 #define PNPBIOS_BASE 0xf000 +#include +#include + #include #include #include @@ -744,6 +747,9 @@ static inline void * bus_to_virt(unsigned long address) #define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set) +void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags); + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_IO_H */ diff --git a/include/linux/io.h b/include/linux/io.h index 3a03a3604cce..6c7f0ba0d5fa 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -65,5 +65,6 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset, void devm_iounmap(struct device *dev, void __iomem *addr); int check_signature(const volatile void __iomem *io_addr, const unsigned char *signature, int length); +void devm_ioremap_release(struct device *dev, void *res); #endif /* _LINUX_IO_H */ diff --git a/lib/devres.c b/lib/devres.c index 26c87c49d776..72c8909006da 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -2,7 +2,7 @@ #include #include -static void devm_ioremap_release(struct device *dev, void *res) +void devm_ioremap_release(struct device *dev, void *res) { iounmap(*(void __iomem **)res); } -- cgit v1.2.3 From 688b744d8bc84dc5cc646e97509113dc5e8818ed Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 24 Apr 2008 16:57:23 -0500 Subject: kgdb: fix signedness mixmatches, add statics, add declaration to header Noticed by sparse: arch/x86/kernel/kgdb.c:556:15: warning: symbol 'kgdb_arch_pc' was not declared. Should it be static? kernel/kgdb.c:149:8: warning: symbol 'kgdb_do_roundup' was not declared. Should it be static? kernel/kgdb.c:193:22: warning: symbol 'kgdb_arch_pc' was not declared. Should it be static? kernel/kgdb.c:712:5: warning: symbol 'remove_all_break' was not declared. Should it be static? Related to kgdb_hex2long: arch/x86/kernel/kgdb.c:371:28: warning: incorrect type in argument 2 (different signedness) arch/x86/kernel/kgdb.c:371:28: expected long *long_val arch/x86/kernel/kgdb.c:371:28: got unsigned long * kernel/kgdb.c:469:27: warning: incorrect type in argument 2 (different signedness) kernel/kgdb.c:469:27: expected long *long_val kernel/kgdb.c:469:27: got unsigned long * kernel/kgdb.c:470:27: warning: incorrect type in argument 2 (different signedness) kernel/kgdb.c:470:27: expected long *long_val kernel/kgdb.c:470:27: got unsigned long * kernel/kgdb.c:894:27: warning: incorrect type in argument 2 (different signedness) kernel/kgdb.c:894:27: expected long *long_val kernel/kgdb.c:894:27: got unsigned long * kernel/kgdb.c:895:27: warning: incorrect type in argument 2 (different signedness) kernel/kgdb.c:895:27: expected long *long_val kernel/kgdb.c:895:27: got unsigned long * kernel/kgdb.c:1127:28: warning: incorrect type in argument 2 (different signedness) kernel/kgdb.c:1127:28: expected long *long_val kernel/kgdb.c:1127:28: got unsigned long * kernel/kgdb.c:1132:25: warning: incorrect type in argument 2 (different signedness) kernel/kgdb.c:1132:25: expected long *long_val kernel/kgdb.c:1132:25: got unsigned long * Signed-off-by: Harvey Harrison Signed-off-by: Jason Wessel --- include/linux/kgdb.h | 4 +++- kernel/kgdb.c | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 9757b1a6d9dc..6adcc297e354 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -261,10 +261,12 @@ struct kgdb_io { extern struct kgdb_arch arch_kgdb_ops; +extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs); + extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops); extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops); -extern int kgdb_hex2long(char **ptr, long *long_val); +extern int kgdb_hex2long(char **ptr, unsigned long *long_val); extern int kgdb_mem2hex(char *mem, char *buf, int count); extern int kgdb_hex2mem(char *buf, char *mem, int count); diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 1bd0ec1c80b2..39e31a036f5b 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -61,7 +61,7 @@ struct kgdb_state { int err_code; int cpu; int pass_exception; - long threadid; + unsigned long threadid; long kgdb_usethreadid; struct pt_regs *linux_regs; }; @@ -146,7 +146,7 @@ atomic_t kgdb_cpu_doing_single_step = ATOMIC_INIT(-1); * the other CPUs might interfere with your debugging context, so * use this with care: */ -int kgdb_do_roundup = 1; +static int kgdb_do_roundup = 1; static int __init opt_nokgdbroundup(char *str) { @@ -438,7 +438,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count) * While we find nice hex chars, build a long_val. * Return number of chars processed. */ -int kgdb_hex2long(char **ptr, long *long_val) +int kgdb_hex2long(char **ptr, unsigned long *long_val) { int hex_val; int num = 0; @@ -709,7 +709,7 @@ int kgdb_isremovedbreak(unsigned long addr) return 0; } -int remove_all_break(void) +static int remove_all_break(void) { unsigned long addr; int error; -- cgit v1.2.3 From b6d9d267f0d68104df910fca89149803aec82426 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 5 May 2008 21:26:15 +0200 Subject: m68k: remove old mac_esp cruft Remove the rest of the old mac_esp driver. Also ditch the rest of the machw mechanism, it needs to be replaced by a fake openfirmware tree. Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 3 --- arch/m68k/mac/config.c | 24 ------------------------ include/asm-m68k/machw.h | 30 ------------------------------ 3 files changed, 57 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a3c35446e755..b2dd3dc32fd3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1094,9 +1094,6 @@ and is between 256 and 4096 characters. It is defined in the file mac5380= [HW,SCSI] Format: ,,,, - mac53c9x= [HW,SCSI] Format: - ,,,,,,, - machvec= [IA64] Force the use of a particular machine-vector (machvec) in a generic kernel. Example: machvec=hpzx1_swiotlb diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 735a49b4b936..ad3e3bacae39 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -48,9 +48,6 @@ struct mac_booter_data mac_bi_data; int mac_bisize = sizeof mac_bi_data; -struct mac_hw_present mac_hw_present; -EXPORT_SYMBOL(mac_hw_present); - /* New m68k bootinfo stuff and videobase */ extern int m68k_num_memory; @@ -817,27 +814,6 @@ void __init mac_identify(void) m68k_ramdisk.addr, m68k_ramdisk.size); #endif - /* - * TODO: set the various fields in macintosh_config->hw_present here! - */ - switch (macintosh_config->scsi_type) { - case MAC_SCSI_OLD: - MACHW_SET(MAC_SCSI_80); - break; - case MAC_SCSI_QUADRA: - case MAC_SCSI_QUADRA2: - case MAC_SCSI_QUADRA3: - MACHW_SET(MAC_SCSI_96); - if ((macintosh_config->ident == MAC_MODEL_Q900) || - (macintosh_config->ident == MAC_MODEL_Q950)) - MACHW_SET(MAC_SCSI_96_2); - break; - default: - printk(KERN_WARNING "config.c: wtf: unknown scsi, using 53c80\n"); - MACHW_SET(MAC_SCSI_80); - break; - } - iop_init(); via_init(); oss_init(); diff --git a/include/asm-m68k/machw.h b/include/asm-m68k/machw.h index d2e0e25d5c90..35624998291c 100644 --- a/include/asm-m68k/machw.h +++ b/include/asm-m68k/machw.h @@ -66,36 +66,6 @@ struct MAC_SCC # define mac_scc ((*(volatile struct SCC*)MAC_SCC_BAS)) #endif -/* hardware stuff */ - -#define MACHW_DECLARE(name) unsigned name : 1 -#define MACHW_SET(name) (mac_hw_present.name = 1) -#define MACHW_PRESENT(name) (mac_hw_present.name) - -struct mac_hw_present { - /* video hardware */ - /* sound hardware */ - /* disk storage interfaces */ - MACHW_DECLARE(MAC_SCSI_80); /* Directly mapped NCR5380 */ - MACHW_DECLARE(MAC_SCSI_96); /* 53c9[46] */ - MACHW_DECLARE(MAC_SCSI_96_2); /* 2nd 53c9[46] Q900 and Q950 */ - MACHW_DECLARE(IDE); /* IDE Interface */ - /* other I/O hardware */ - MACHW_DECLARE(SCC); /* Serial Communications Contr. */ - /* DMA */ - MACHW_DECLARE(SCSI_DMA); /* DMA for the NCR5380 */ - /* real time clocks */ - MACHW_DECLARE(RTC_CLK); /* clock chip */ - /* supporting hardware */ - MACHW_DECLARE(VIA1); /* Versatile Interface Ad. 1 */ - MACHW_DECLARE(VIA2); /* Versatile Interface Ad. 2 */ - MACHW_DECLARE(RBV); /* Versatile Interface Ad. 2+ */ - /* NUBUS */ - MACHW_DECLARE(NUBUS); /* NUBUS */ -}; - -extern struct mac_hw_present mac_hw_present; - #endif /* __ASSEMBLY__ */ #endif /* linux/machw.h */ -- cgit v1.2.3 From 8ae121ac8666b0421aa20fd80d4597ec66fa54bc Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Wed, 23 Apr 2008 07:13:29 -0400 Subject: sched: fix RT task-wakeup logic Dmitry Adamushko pointed out a logic error in task_wake_up_rt() where we will always evaluate to "true". You can find the thread here: http://lkml.org/lkml/2008/4/22/296 In reality, we only want to try to push tasks away when a wake up request is not going to preempt the current task. So lets fix it. Note: We introduce test_tsk_need_resched() instead of open-coding the flag check so that the merge-conflict with -rt should help remind us that we may need to support NEEDS_RESCHED_DELAYED in the future, too. Signed-off-by: Gregory Haskins CC: Dmitry Adamushko CC: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 ++++++- kernel/sched_rt.c | 7 +++++-- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 03c238088aee..698b5a4d25a7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1977,6 +1977,11 @@ static inline void clear_tsk_need_resched(struct task_struct *tsk) clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); } +static inline int test_tsk_need_resched(struct task_struct *tsk) +{ + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); +} + static inline int signal_pending(struct task_struct *p) { return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); @@ -1991,7 +1996,7 @@ static inline int fatal_signal_pending(struct task_struct *p) static inline int need_resched(void) { - return unlikely(test_thread_flag(TIF_NEED_RESCHED)); + return unlikely(test_tsk_need_resched(current)); } /* diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index dcd649588593..060e87b0cb1c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1098,11 +1098,14 @@ static void post_schedule_rt(struct rq *rq) } } - +/* + * If we are not running and we are not going to reschedule soon, we should + * try to push tasks away now + */ static void task_wake_up_rt(struct rq *rq, struct task_struct *p) { if (!task_running(rq, p) && - (p->prio >= rq->rt.highest_prio) && + !test_tsk_need_resched(rq->curr) && rq->rt.overloaded) push_rt_tasks(rq); } -- cgit v1.2.3 From 690229a0912ca2fef8b542fe4d8b73acfcdc6e24 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 23 Apr 2008 09:31:35 +0200 Subject: sched: make clock sync tunable by architecture code make time_sync_thresh tunable to architecture code. Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ kernel/sched.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 698b5a4d25a7..54c9ca26b7d8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -158,6 +158,8 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) } #endif +extern unsigned long long time_sync_thresh; + /* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). diff --git a/kernel/sched.c b/kernel/sched.c index 3ac3d7af04a1..8f433fedfcb3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -899,7 +899,7 @@ static inline u64 global_rt_runtime(void) return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; } -static const unsigned long long time_sync_thresh = 100000; +unsigned long long time_sync_thresh = 100000; static DEFINE_PER_CPU(unsigned long long, time_offset); static DEFINE_PER_CPU(unsigned long long, prev_cpu_time); -- cgit v1.2.3 From 3e51f33fcc7f55e6df25d15b55ed10c8b4da84cd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 3 May 2008 18:29:28 +0200 Subject: sched: add optional support for CONFIG_HAVE_UNSTABLE_SCHED_CLOCK this replaces the rq->clock stuff (and possibly cpu_clock()). - architectures that have an 'imperfect' hardware clock can set CONFIG_HAVE_UNSTABLE_SCHED_CLOCK - the 'jiffie' window might be superfulous when we update tick_gtod before the __update_sched_clock() call in sched_clock_tick() - cpu_clock() might be implemented as: sched_clock_cpu(smp_processor_id()) if the accuracy proves good enough - how far can TSC drift in a single jiffie when considering the filtering and idle hooks? [ mingo@elte.hu: various fixes and cleanups ] Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 29 +++++++ init/main.c | 1 + kernel/Makefile | 2 +- kernel/sched.c | 165 +++-------------------------------- kernel/sched_clock.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/sched_debug.c | 7 -- kernel/sched_fair.c | 2 +- 7 files changed, 281 insertions(+), 161 deletions(-) create mode 100644 kernel/sched_clock.c (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 54c9ca26b7d8..0c35b0343a76 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1553,6 +1553,35 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) extern unsigned long long sched_clock(void); +#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +static inline void sched_clock_init(void) +{ +} + +static inline u64 sched_clock_cpu(int cpu) +{ + return sched_clock(); +} + +static inline void sched_clock_tick(void) +{ +} + +static inline void sched_clock_idle_sleep_event(void) +{ +} + +static inline void sched_clock_idle_wakeup_event(u64 delta_ns) +{ +} +#else +extern void sched_clock_init(void); +extern u64 sched_clock_cpu(int cpu); +extern void sched_clock_tick(void); +extern void sched_clock_idle_sleep_event(void); +extern void sched_clock_idle_wakeup_event(u64 delta_ns); +#endif + /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu * clock constructed from sched_clock(): diff --git a/init/main.c b/init/main.c index a87d4ca5c36c..ddada7acf363 100644 --- a/init/main.c +++ b/init/main.c @@ -602,6 +602,7 @@ asmlinkage void __init start_kernel(void) softirq_init(); timekeeping_init(); time_init(); + sched_clock_init(); profile_init(); if (!irqs_disabled()) printk("start_kernel(): bug: interrupts were enabled early\n"); diff --git a/kernel/Makefile b/kernel/Makefile index 188c43223f52..1c9938addb9d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ - notifier.o ksysfs.o pm_qos_params.o + notifier.o ksysfs.o pm_qos_params.o sched_clock.o obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/kernel/sched.c b/kernel/sched.c index 9457106b18af..58fb8af15776 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -74,16 +74,6 @@ #include #include -/* - * Scheduler clock - returns current time in nanosec units. - * This is default implementation. - * Architectures and sub-architectures can override this. - */ -unsigned long long __attribute__((weak)) sched_clock(void) -{ - return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); -} - /* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], @@ -557,13 +547,7 @@ struct rq { unsigned long next_balance; struct mm_struct *prev_mm; - u64 clock, prev_clock_raw; - s64 clock_max_delta; - - unsigned int clock_warps, clock_overflows, clock_underflows; - u64 idle_clock; - unsigned int clock_deep_idle_events; - u64 tick_timestamp; + u64 clock; atomic_t nr_iowait; @@ -628,82 +612,6 @@ static inline int cpu_of(struct rq *rq) #endif } -#ifdef CONFIG_NO_HZ -static inline bool nohz_on(int cpu) -{ - return tick_get_tick_sched(cpu)->nohz_mode != NOHZ_MODE_INACTIVE; -} - -static inline u64 max_skipped_ticks(struct rq *rq) -{ - return nohz_on(cpu_of(rq)) ? jiffies - rq->last_tick_seen + 2 : 1; -} - -static inline void update_last_tick_seen(struct rq *rq) -{ - rq->last_tick_seen = jiffies; -} -#else -static inline u64 max_skipped_ticks(struct rq *rq) -{ - return 1; -} - -static inline void update_last_tick_seen(struct rq *rq) -{ -} -#endif - -/* - * Update the per-runqueue clock, as finegrained as the platform can give - * us, but without assuming monotonicity, etc.: - */ -static void __update_rq_clock(struct rq *rq) -{ - u64 prev_raw = rq->prev_clock_raw; - u64 now = sched_clock(); - s64 delta = now - prev_raw; - u64 clock = rq->clock; - -#ifdef CONFIG_SCHED_DEBUG - WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); -#endif - /* - * Protect against sched_clock() occasionally going backwards: - */ - if (unlikely(delta < 0)) { - clock++; - rq->clock_warps++; - } else { - /* - * Catch too large forward jumps too: - */ - u64 max_jump = max_skipped_ticks(rq) * TICK_NSEC; - u64 max_time = rq->tick_timestamp + max_jump; - - if (unlikely(clock + delta > max_time)) { - if (clock < max_time) - clock = max_time; - else - clock++; - rq->clock_overflows++; - } else { - if (unlikely(delta > rq->clock_max_delta)) - rq->clock_max_delta = delta; - clock += delta; - } - } - - rq->prev_clock_raw = now; - rq->clock = clock; -} - -static void update_rq_clock(struct rq *rq) -{ - if (likely(smp_processor_id() == cpu_of(rq))) - __update_rq_clock(rq); -} - /* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. * See detach_destroy_domains: synchronize_sched for details. @@ -719,6 +627,11 @@ static void update_rq_clock(struct rq *rq) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) +static inline void update_rq_clock(struct rq *rq) +{ + rq->clock = sched_clock_cpu(cpu_of(rq)); +} + /* * Tunables that become constants when CONFIG_SCHED_DEBUG is off: */ @@ -935,7 +848,6 @@ static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu) static unsigned long long __cpu_clock(int cpu) { unsigned long long now; - struct rq *rq; /* * Only call sched_clock() if the scheduler has already been @@ -944,9 +856,7 @@ static unsigned long long __cpu_clock(int cpu) if (unlikely(!scheduler_running)) return 0; - rq = cpu_rq(cpu); - update_rq_clock(rq); - now = rq->clock; + now = sched_clock_cpu(cpu); return now; } @@ -1120,45 +1030,6 @@ static struct rq *this_rq_lock(void) return rq; } -/* - * We are going deep-idle (irqs are disabled): - */ -void sched_clock_idle_sleep_event(void) -{ - struct rq *rq = cpu_rq(smp_processor_id()); - - WARN_ON(!irqs_disabled()); - spin_lock(&rq->lock); - __update_rq_clock(rq); - spin_unlock(&rq->lock); - rq->clock_deep_idle_events++; -} -EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); - -/* - * We just idled delta nanoseconds (called with irqs disabled): - */ -void sched_clock_idle_wakeup_event(u64 delta_ns) -{ - struct rq *rq = cpu_rq(smp_processor_id()); - u64 now = sched_clock(); - - WARN_ON(!irqs_disabled()); - rq->idle_clock += delta_ns; - /* - * Override the previous timestamp and ignore all - * sched_clock() deltas that occured while we idled, - * and use the PM-provided delta_ns to advance the - * rq clock: - */ - spin_lock(&rq->lock); - rq->prev_clock_raw = now; - rq->clock += delta_ns; - spin_unlock(&rq->lock); - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); - static void __resched_task(struct task_struct *p, int tif_bit); static inline void resched_task(struct task_struct *p) @@ -1283,7 +1154,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); spin_lock(&rq->lock); - __update_rq_clock(rq); + update_rq_clock(rq); rq->curr->sched_class->task_tick(rq, rq->curr, 1); spin_unlock(&rq->lock); @@ -4476,19 +4347,11 @@ void scheduler_tick(void) int cpu = smp_processor_id(); struct rq *rq = cpu_rq(cpu); struct task_struct *curr = rq->curr; - u64 next_tick = rq->tick_timestamp + TICK_NSEC; + + sched_clock_tick(); spin_lock(&rq->lock); - __update_rq_clock(rq); - /* - * Let rq->clock advance by at least TICK_NSEC: - */ - if (unlikely(rq->clock < next_tick)) { - rq->clock = next_tick; - rq->clock_underflows++; - } - rq->tick_timestamp = rq->clock; - update_last_tick_seen(rq); + update_rq_clock(rq); update_cpu_load(rq); curr->sched_class->task_tick(rq, curr, 0); spin_unlock(&rq->lock); @@ -4642,7 +4505,7 @@ need_resched_nonpreemptible: * Do the rq-clock update outside the rq lock: */ local_irq_disable(); - __update_rq_clock(rq); + update_rq_clock(rq); spin_lock(&rq->lock); clear_tsk_need_resched(prev); @@ -8226,8 +8089,6 @@ void __init sched_init(void) spin_lock_init(&rq->lock); lockdep_set_class(&rq->lock, &rq->rq_lock_key); rq->nr_running = 0; - rq->clock = 1; - update_last_tick_seen(rq); init_cfs_rq(&rq->cfs, rq); init_rt_rq(&rq->rt, rq); #ifdef CONFIG_FAIR_GROUP_SCHED @@ -8371,6 +8232,7 @@ EXPORT_SYMBOL(__might_sleep); static void normalize_task(struct rq *rq, struct task_struct *p) { int on_rq; + update_rq_clock(rq); on_rq = p->se.on_rq; if (on_rq) @@ -8402,7 +8264,6 @@ void normalize_rt_tasks(void) p->se.sleep_start = 0; p->se.block_start = 0; #endif - task_rq(p)->clock = 0; if (!rt_task(p)) { /* diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c new file mode 100644 index 000000000000..9c597e37f7de --- /dev/null +++ b/kernel/sched_clock.c @@ -0,0 +1,236 @@ +/* + * sched_clock for unstable cpu clocks + * + * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra + * + * Based on code by: + * Ingo Molnar + * Guillaume Chazarain + * + * Create a semi stable clock from a mixture of other events, including: + * - gtod + * - jiffies + * - sched_clock() + * - explicit idle events + * + * We use gtod as base and the unstable clock deltas. The deltas are filtered, + * making it monotonic and keeping it within an expected window. This window + * is set up using jiffies. + * + * Furthermore, explicit sleep and wakeup hooks allow us to account for time + * that is otherwise invisible (TSC gets stopped). + * + * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat + * consistent between cpus (never more than 1 jiffies difference). + */ +#include +#include +#include +#include +#include + + +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK + +struct sched_clock_data { + /* + * Raw spinlock - this is a special case: this might be called + * from within instrumentation code so we dont want to do any + * instrumentation ourselves. + */ + raw_spinlock_t lock; + + unsigned long prev_jiffies; + u64 prev_raw; + u64 tick_raw; + u64 tick_gtod; + u64 clock; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); + +static inline struct sched_clock_data *this_scd(void) +{ + return &__get_cpu_var(sched_clock_data); +} + +static inline struct sched_clock_data *cpu_sdc(int cpu) +{ + return &per_cpu(sched_clock_data, cpu); +} + +void sched_clock_init(void) +{ + u64 ktime_now = ktime_to_ns(ktime_get()); + u64 now = 0; + int cpu; + + for_each_possible_cpu(cpu) { + struct sched_clock_data *scd = cpu_sdc(cpu); + + scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + scd->prev_jiffies = jiffies; + scd->prev_raw = now; + scd->tick_raw = now; + scd->tick_gtod = ktime_now; + scd->clock = ktime_now; + } +} + +/* + * update the percpu scd from the raw @now value + * + * - filter out backward motion + * - use jiffies to generate a min,max window to clip the raw values + */ +static void __update_sched_clock(struct sched_clock_data *scd, u64 now) +{ + unsigned long now_jiffies = jiffies; + long delta_jiffies = now_jiffies - scd->prev_jiffies; + u64 clock = scd->clock; + u64 min_clock, max_clock; + s64 delta = now - scd->prev_raw; + + WARN_ON_ONCE(!irqs_disabled()); + min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC; + + if (unlikely(delta < 0)) { + clock++; + goto out; + } + + max_clock = min_clock + TICK_NSEC; + + if (unlikely(clock + delta > max_clock)) { + if (clock < max_clock) + clock = max_clock; + else + clock++; + } else { + clock += delta; + } + + out: + if (unlikely(clock < min_clock)) + clock = min_clock; + + scd->prev_raw = now; + scd->prev_jiffies = now_jiffies; + scd->clock = clock; +} + +static void lock_double_clock(struct sched_clock_data *data1, + struct sched_clock_data *data2) +{ + if (data1 < data2) { + __raw_spin_lock(&data1->lock); + __raw_spin_lock(&data2->lock); + } else { + __raw_spin_lock(&data2->lock); + __raw_spin_lock(&data1->lock); + } +} + +u64 sched_clock_cpu(int cpu) +{ + struct sched_clock_data *scd = cpu_sdc(cpu); + u64 now, clock; + + WARN_ON_ONCE(!irqs_disabled()); + now = sched_clock(); + + if (cpu != raw_smp_processor_id()) { + /* + * in order to update a remote cpu's clock based on our + * unstable raw time rebase it against: + * tick_raw (offset between raw counters) + * tick_gotd (tick offset between cpus) + */ + struct sched_clock_data *my_scd = this_scd(); + + lock_double_clock(scd, my_scd); + + now -= my_scd->tick_raw; + now += scd->tick_raw; + + now -= my_scd->tick_gtod; + now += scd->tick_gtod; + + __raw_spin_unlock(&my_scd->lock); + } else { + __raw_spin_lock(&scd->lock); + } + + __update_sched_clock(scd, now); + clock = scd->clock; + + __raw_spin_unlock(&scd->lock); + + return clock; +} + +void sched_clock_tick(void) +{ + struct sched_clock_data *scd = this_scd(); + u64 now, now_gtod; + + WARN_ON_ONCE(!irqs_disabled()); + + now = sched_clock(); + now_gtod = ktime_to_ns(ktime_get()); + + __raw_spin_lock(&scd->lock); + __update_sched_clock(scd, now); + /* + * update tick_gtod after __update_sched_clock() because that will + * already observe 1 new jiffy; adding a new tick_gtod to that would + * increase the clock 2 jiffies. + */ + scd->tick_raw = now; + scd->tick_gtod = now_gtod; + __raw_spin_unlock(&scd->lock); +} + +/* + * We are going deep-idle (irqs are disabled): + */ +void sched_clock_idle_sleep_event(void) +{ + sched_clock_cpu(smp_processor_id()); +} +EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); + +/* + * We just idled delta nanoseconds (called with irqs disabled): + */ +void sched_clock_idle_wakeup_event(u64 delta_ns) +{ + struct sched_clock_data *scd = this_scd(); + u64 now = sched_clock(); + + /* + * Override the previous timestamp and ignore all + * sched_clock() deltas that occured while we idled, + * and use the PM-provided delta_ns to advance the + * rq clock: + */ + __raw_spin_lock(&scd->lock); + scd->prev_raw = now; + scd->clock += delta_ns; + __raw_spin_unlock(&scd->lock); + + touch_softlockup_watchdog(); +} +EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); + +#endif + +/* + * Scheduler clock - returns current time in nanosec units. + * This is default implementation. + * Architectures and sub-architectures can override this. + */ +unsigned long long __attribute__((weak)) sched_clock(void) +{ + return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); +} diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 6b4a12558e88..5f06118fbc31 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -204,13 +204,6 @@ static void print_cpu(struct seq_file *m, int cpu) PN(next_balance); P(curr->pid); PN(clock); - PN(idle_clock); - PN(prev_clock_raw); - P(clock_warps); - P(clock_overflows); - P(clock_underflows); - P(clock_deep_idle_events); - PN(clock_max_delta); P(cpu_load[0]); P(cpu_load[1]); P(cpu_load[2]); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index d99e01f6929a..c863663d204d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -959,7 +959,7 @@ static void yield_task_fair(struct rq *rq) return; if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { - __update_rq_clock(rq); + update_rq_clock(rq); /* * Update run-time statistics of the 'current'. */ -- cgit v1.2.3 From 78ab88f04f44bed566d51dce0c7cbfeff6449a06 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 1 May 2008 23:41:41 +0900 Subject: libata: improve post-reset device ready test Some controllers (jmb and inic162x) use 0x77 and 0x7f to indicate that the device isn't ready yet. It looks like they use 0xff if device presence is detected but connection isn't established. 0x77 or 0x7f after connection is established and use the value from signature FIS after receiving it. This patch implements ata_check_ready(), which takes TF status value and determines whether the port is ready or not considering the above and other conditions, and use it in @check_ready() functions. This is safe as both 0x77 and 0x7f aren't valid ready status value even though they have BSY bit cleared. This fixes hot plug detection failures which can be triggered with certain drives if they aren't already spun up when the data connector is hot plugged. Tested on sil, sil24, ahci (jmb/ich), piix and inic162x combined with eight drives from all major vendors. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/ahci.c | 4 +--- drivers/ata/libata-sff.c | 6 +----- include/linux/libata.h | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 8cace9aa9c03..97f83fb2ee2e 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1267,9 +1267,7 @@ static int ahci_check_ready(struct ata_link *link) void __iomem *port_mmio = ahci_port_base(link->ap); u8 status = readl(port_mmio + PORT_TFDATA) & 0xFF; - if (!(status & ATA_BUSY)) - return 1; - return 0; + return ata_check_ready(status); } static int ahci_softreset(struct ata_link *link, unsigned int *class, diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 2ec65a8fda79..3c2d2289f85e 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -314,11 +314,7 @@ static int ata_sff_check_ready(struct ata_link *link) { u8 status = link->ap->ops->sff_check_status(link->ap); - if (!(status & ATA_BUSY)) - return 1; - if (status == 0xff) - return -ENODEV; - return 0; + return ata_check_ready(status); } /** diff --git a/include/linux/libata.h b/include/linux/libata.h index d1dfe872ee30..95e6159b44cf 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1381,6 +1381,21 @@ static inline struct ata_port *ata_shost_to_port(struct Scsi_Host *host) return *(struct ata_port **)&host->hostdata[0]; } +static inline int ata_check_ready(u8 status) +{ + /* Some controllers report 0x77 or 0x7f during intermediate + * not-ready stages. + */ + if (status == 0x77 || status == 0x7f) + return 0; + + /* 0xff indicates either no device or device not ready */ + if (status == 0xff) + return -ENODEV; + + return !(status & ATA_BUSY); +} + /************************************************************************** * PMP - drivers/ata/libata-pmp.c -- cgit v1.2.3 From 10acf3b0d3b46c6ef5d6f0722f72ad9b743ea848 Mon Sep 17 00:00:00 2001 From: Mark Lord Date: Fri, 2 May 2008 02:14:53 -0400 Subject: libata: export ata_eh_analyze_ncq_error Export ata_eh_analyze_ncq_error() for subsequent use by sata_mv, as suggested by Tejun. Signed-off-by: Mark Lord Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 1 + drivers/ata/libata-eh.c | 2 +- include/linux/libata.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 3bc488538204..927b692d723c 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6292,6 +6292,7 @@ EXPORT_SYMBOL_GPL(ata_eh_freeze_port); EXPORT_SYMBOL_GPL(ata_eh_thaw_port); EXPORT_SYMBOL_GPL(ata_eh_qc_complete); EXPORT_SYMBOL_GPL(ata_eh_qc_retry); +EXPORT_SYMBOL_GPL(ata_eh_analyze_ncq_error); EXPORT_SYMBOL_GPL(ata_do_eh); EXPORT_SYMBOL_GPL(ata_std_error_handler); diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 61dcd0026c64..62e033146bed 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1357,7 +1357,7 @@ static void ata_eh_analyze_serror(struct ata_link *link) * LOCKING: * Kernel thread context (may sleep). */ -static void ata_eh_analyze_ncq_error(struct ata_link *link) +void ata_eh_analyze_ncq_error(struct ata_link *link) { struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; diff --git a/include/linux/libata.h b/include/linux/libata.h index 95e6159b44cf..7e206da1fbfb 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1039,6 +1039,7 @@ extern void ata_eh_thaw_port(struct ata_port *ap); extern void ata_eh_qc_complete(struct ata_queued_cmd *qc); extern void ata_eh_qc_retry(struct ata_queued_cmd *qc); +extern void ata_eh_analyze_ncq_error(struct ata_link *link); extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, ata_reset_fn_t softreset, ata_reset_fn_t hardreset, -- cgit v1.2.3 From 9b9a8bfc8dfbe09dc57f274e32e8b06151abbad7 Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Fri, 2 May 2008 13:00:51 -0500 Subject: phylib: Fix some sparse warnings Declared some things static, declared some things in the header. Signed-off-by: Andy Fleming Signed-off-by: Jeff Garzik --- drivers/net/phy/phy.c | 2 +- include/linux/phy.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 3c18bb594957..45cc2914d347 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -547,7 +547,7 @@ static void phy_force_reduction(struct phy_device *phydev) * Must not be called from interrupt context, or while the * phydev->lock is held. */ -void phy_error(struct phy_device *phydev) +static void phy_error(struct phy_device *phydev) { mutex_lock(&phydev->lock); phydev->state = PHY_HALTED; diff --git a/include/linux/phy.h b/include/linux/phy.h index 02df20f085fe..7224c4099a28 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -412,6 +412,8 @@ int mdiobus_register(struct mii_bus *bus); void mdiobus_unregister(struct mii_bus *bus); void phy_sanitize_settings(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); +int phy_enable_interrupts(struct phy_device *phydev); +int phy_disable_interrupts(struct phy_device *phydev); static inline int phy_read_status(struct phy_device *phydev) { return phydev->drv->read_status(phydev); @@ -447,5 +449,8 @@ int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); int phy_scan_fixups(struct phy_device *phydev); +int __init mdio_bus_init(void); +void mdio_bus_exit(void); + extern struct bus_type mdio_bus_type; #endif /* __PHY_H */ -- cgit v1.2.3 From 33dcdac2df54e66c447ae03f58c95c7251aa5649 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Apr 2008 17:46:26 +0200 Subject: [PATCH] kill ->put_inode And with that last patch to affs killing the last put_inode instance we can finally, after many years of transition kill this racy and awkward interface. (It's kinda funny that even the description in Documentation/filesystems/vfs.txt was entirely wrong..) Also remove a very misleading comment above the defintion of struct super_operations. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 -- Documentation/filesystems/vfs.txt | 4 ---- fs/inode.c | 3 --- include/linux/fs.h | 5 ----- 4 files changed, 14 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index c2992bc54f2f..8b22d7d8b991 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -92,7 +92,6 @@ prototypes: void (*destroy_inode)(struct inode *); void (*dirty_inode) (struct inode *); int (*write_inode) (struct inode *, int); - void (*put_inode) (struct inode *); void (*drop_inode) (struct inode *); void (*delete_inode) (struct inode *); void (*put_super) (struct super_block *); @@ -115,7 +114,6 @@ alloc_inode: no no no destroy_inode: no dirty_inode: no (must not sleep) write_inode: no -put_inode: no drop_inode: no !!!inode_lock!!! delete_inode: no put_super: yes yes no diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 81e5be6e6e35..b7522c6cbae3 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -205,7 +205,6 @@ struct super_operations { void (*dirty_inode) (struct inode *); int (*write_inode) (struct inode *, int); - void (*put_inode) (struct inode *); void (*drop_inode) (struct inode *); void (*delete_inode) (struct inode *); void (*put_super) (struct super_block *); @@ -246,9 +245,6 @@ or bottom half). inode to disc. The second parameter indicates whether the write should be synchronous or not, not all filesystems check this flag. - put_inode: called when the VFS inode is removed from the inode - cache. - drop_inode: called when the last access to the inode is dropped, with the inode_lock spinlock held. diff --git a/fs/inode.c b/fs/inode.c index bf6478130424..18bdce14b70c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1153,9 +1153,6 @@ void iput(struct inode *inode) BUG_ON(inode->i_state == I_CLEAR); - if (op && op->put_inode) - op->put_inode(inode); - if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) iput_final(inode); } diff --git a/include/linux/fs.h b/include/linux/fs.h index a1ba005d08e7..7e0fa9e64479 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1289,17 +1289,12 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, extern ssize_t vfs_writev(struct file *, const struct iovec __user *, unsigned long, loff_t *); -/* - * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called - * without the big kernel lock held in all filesystems. - */ struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); void (*dirty_inode) (struct inode *); int (*write_inode) (struct inode *, int); - void (*put_inode) (struct inode *); void (*drop_inode) (struct inode *); void (*delete_inode) (struct inode *); void (*put_super) (struct super_block *); -- cgit v1.2.3 From aeed5fce37196e09b4dac3a1c00d8b7122e040ce Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 May 2008 20:49:23 +0100 Subject: x86: fix PAE pmd_bad bootup warning Fix warning from pmd_bad() at bootup on a HIGHMEM64G HIGHPTE x86_32. That came from 9fc34113f6880b215cbea4e7017fc818700384c2 x86: debug pmd_bad(); but we understand now that the typecasting was wrong for PAE in the previous version: pagetable pages above 4GB looked bad and stopped Arjan from booting. And revert that cded932b75ab0a5f9181ee3da34a0a488d1a14fd x86: fix pmd_bad and pud_bad to support huge pages. It was the wrong way round: we shouldn't weaken every pmd_bad and pud_bad check to let huge pages slip through - in part they check that we _don't_ have a huge page where it's not expected. Put the x86 pmd_bad() and pud_bad() definitions back to what they have long been: they can be improved (x86_32 should use PTE_MASK, to stop PAE thinking junk in the upper word is good; and x86_64 should follow x86_32's stricter comparison, to stop thinking any subset of required bits is good); but that should be a later patch. Fix Hans' good observation that follow_page() will never find pmd_huge() because that would have already failed the pmd_bad test: test pmd_huge in between the pmd_none and pmd_bad tests. Tighten x86's pmd_huge() check? No, once it's a hugepage entry, it can get quite far from a good pmd: for example, PROT_NONE leaves it with only ACCESSED of the KERN_PGTABLE bits. However... though follow_page() contains this and another test for huge pages, so it's nice to keep it working on them, where does it actually get called on a huge page? get_user_pages() checks is_vm_hugetlb_page(vma) to to call alternative hugetlb processing, as does unmap_vmas() and others. Signed-off-by: Hugh Dickins Earlier-version-tested-by: Ingo Molnar Cc: Thomas Gleixner Cc: Jeff Chua Cc: Hans Rosenfeld Cc: Arjan van de Ven Signed-off-by: Linus Torvalds --- arch/x86/mm/pgtable_32.c | 7 ------- include/asm-x86/pgtable_32.h | 9 +-------- include/asm-x86/pgtable_64.h | 6 ++---- mm/memory.c | 5 ++++- 4 files changed, 7 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 9ee007be9142..369cf065b6a4 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -172,10 +172,3 @@ void reserve_top_address(unsigned long reserve) __FIXADDR_TOP = -reserve - PAGE_SIZE; __VMALLOC_RESERVE += reserve; } - -int pmd_bad(pmd_t pmd) -{ - WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); - - return pmd_bad_v1(pmd); -} diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index 577ab79c4c27..d7f0403bbecb 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -88,14 +88,7 @@ extern unsigned long pg0[]; /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ #define pmd_none(x) (!(unsigned long)pmd_val((x))) #define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) - -extern int pmd_bad(pmd_t pmd); - -#define pmd_bad_v1(x) \ - (_KERNPG_TABLE != (pmd_val((x)) & ~(PAGE_MASK | _PAGE_USER))) -#define pmd_bad_v2(x) \ - (_KERNPG_TABLE != (pmd_val((x)) & ~(PAGE_MASK | _PAGE_USER | \ - _PAGE_PSE | _PAGE_NX))) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index a3bbf8766c1d..efe83dcbd412 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -158,14 +158,12 @@ static inline unsigned long pgd_bad(pgd_t pgd) static inline unsigned long pud_bad(pud_t pud) { - return pud_val(pud) & - ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX); + return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); } static inline unsigned long pmd_bad(pmd_t pmd) { - return pmd_val(pmd) & - ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX); + return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); } #define pte_none(x) (!pte_val((x))) diff --git a/mm/memory.c b/mm/memory.c index bbab1e37055e..48c122d42ed7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -969,7 +969,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, goto no_page_table; pmd = pmd_offset(pud, address); - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + if (pmd_none(*pmd)) goto no_page_table; if (pmd_huge(*pmd)) { @@ -978,6 +978,9 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, goto out; } + if (unlikely(pmd_bad(*pmd))) + goto no_page_table; + ptep = pte_offset_map_lock(mm, pmd, address, &ptl); if (!ptep) goto out; -- cgit v1.2.3 From 0eaeafa10f3b2bd027e95859a6785d4c7fcc174c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 7 May 2008 09:22:53 +0200 Subject: [S390] s390-kvm: leave sie context on work. Removes preemption requirement From: Martin Schwidefsky This patch fixes a bug with cpu bound guest on kvm-s390. Sometimes it was impossible to deliver a signal to a spinning guest. We used preemption as a circumvention. The preemption notifiers called vcpu_load, which checked for pending signals and triggered a host intercept. But even with preemption, a sigkill was not delivered immediately. This patch changes the low level host interrupt handler to check for the SIE instruction, if TIF_WORK is set. In that case we change the instruction pointer of the return PSW to rerun the vcpu_run loop. The kvm code sees an intercept reason 0 if that happens. This patch adds accounting for these types of intercept as well. The advantages: - works with and without preemption - signals are delivered immediately - much better host latencies without preemption Acked-by: Carsten Otte Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry64.S | 30 +++++++++++++++++++++++++++++- arch/s390/kvm/Kconfig | 1 - arch/s390/kvm/intercept.c | 3 +++ arch/s390/kvm/kvm-s390.c | 5 +---- include/asm-s390/kvm_host.h | 1 + 5 files changed, 34 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index a57909d63149..fee10177dbfc 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -607,14 +607,37 @@ io_restore_trace_psw: #endif # -# switch to kernel stack, then check TIF bits +# There is work todo, we need to check if we return to userspace, then +# check, if we are in SIE, if yes leave it # io_work: tm SP_PSW+1(%r15),0x01 # returning to user ? #ifndef CONFIG_PREEMPT +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + jnz io_work_user # yes -> no need to check for SIE + la %r1, BASED(sie_opcode) # we return to kernel here + lg %r2, SP_PSW+8(%r15) + clc 0(2,%r1), 0(%r2) # is current instruction = SIE? + jne io_restore # no-> return to kernel + lg %r1, SP_PSW+8(%r15) # yes-> add 4 bytes to leave SIE + aghi %r1, 4 + stg %r1, SP_PSW+8(%r15) + j io_restore # return to kernel +#else jno io_restore # no-> skip resched & signal +#endif #else jnz io_work_user # yes -> do resched & signal +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + la %r1, BASED(sie_opcode) + lg %r2, SP_PSW+8(%r15) + clc 0(2,%r1), 0(%r2) # is current instruction = SIE? + jne 0f # no -> leave PSW alone + lg %r1, SP_PSW+8(%r15) # yes-> add 4 bytes to leave SIE + aghi %r1, 4 + stg %r1, SP_PSW+8(%r15) +0: +#endif # check for preemptive scheduling icm %r0,15,__TI_precount(%r9) jnz io_restore # preemption is disabled @@ -652,6 +675,11 @@ io_work_loop: j io_restore io_work_done: +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +sie_opcode: + .long 0xb2140000 +#endif + # # _TIF_MCCK_PENDING is set, call handler # diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 1761b74d639b..e051cad1f1e0 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -22,7 +22,6 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select S390_SWITCH_AMODE - select PREEMPT ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 349581a26103..47a0b642174c 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -105,6 +105,9 @@ static intercept_handler_t instruction_handlers[256] = { static int handle_noop(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { + case 0x0: + vcpu->stat.exit_null++; + break; case 0x10: vcpu->stat.exit_external_request++; break; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 98d1e73e01f1..0ac36a649eba 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -31,6 +31,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "userspace_handled", VCPU_STAT(exit_userspace) }, + { "exit_null", VCPU_STAT(exit_null) }, { "exit_validity", VCPU_STAT(exit_validity) }, { "exit_stop_request", VCPU_STAT(exit_stop_request) }, { "exit_external_request", VCPU_STAT(exit_external_request) }, @@ -221,10 +222,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; restore_fp_regs(&vcpu->arch.guest_fpregs); restore_access_regs(vcpu->arch.guest_acrs); - - if (signal_pending(current)) - atomic_set_mask(CPUSTAT_STOP_INT, - &vcpu->arch.sie_block->cpuflags); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) diff --git a/include/asm-s390/kvm_host.h b/include/asm-s390/kvm_host.h index f8204a4f2e02..18cbd8a39796 100644 --- a/include/asm-s390/kvm_host.h +++ b/include/asm-s390/kvm_host.h @@ -104,6 +104,7 @@ struct sie_block { struct kvm_vcpu_stat { u32 exit_userspace; + u32 exit_null; u32 exit_external_request; u32 exit_external_interrupt; u32 exit_stop_request; -- cgit v1.2.3 From b499d76bfd78e900039155247e1c21bfdf807b7b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 7 May 2008 09:22:57 +0200 Subject: [S390] compat ptrace cleanup This removes redundant arch code for generic ptrace requests already handled by ptrace_request and compat_ptrace_request. It simplifies things to just have the standard entry points, and use the generic compat_sys_ptrace. Signed-off-by: Roland McGrath Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_wrapper.S | 2 +- arch/s390/kernel/ptrace.c | 100 +++----------------------------------- include/asm-s390/ptrace.h | 2 + 3 files changed, 9 insertions(+), 95 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 743d54f0b8db..d003a6e16afb 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -121,7 +121,7 @@ sys32_ptrace_wrapper: lgfr %r3,%r3 # long llgtr %r4,%r4 # long llgfr %r5,%r5 # long - jg sys_ptrace # branch to system call + jg compat_sys_ptrace # branch to system call .globl sys32_alarm_wrapper sys32_alarm_wrapper: diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 7f4270163744..35827b9bd4d1 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -292,8 +292,7 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) return 0; } -static int -do_ptrace_normal(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { ptrace_area parea; int copied, ret; @@ -529,35 +528,19 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data) return 0; } -static int -do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) { - unsigned int tmp; /* 4 bytes !! */ + unsigned long addr = caddr; + unsigned long data = cdata; ptrace_area_emu31 parea; int copied, ret; switch (request) { - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - /* read word at location addr. */ - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - if (copied != sizeof(tmp)) - return -EIO; - return put_user(tmp, (unsigned int __force __user *) data); - case PTRACE_PEEKUSR: /* read the word at location addr in the USER area. */ return peek_user_emu31(child, addr, data); - case PTRACE_POKETEXT: - case PTRACE_POKEDATA: - /* write the word at location addr. */ - tmp = data; - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 1); - if (copied != sizeof(tmp)) - return -EIO; - return 0; - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ return poke_user_emu31(child, addr, data); @@ -587,82 +570,11 @@ do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) copied += sizeof(unsigned int); } return 0; - case PTRACE_GETEVENTMSG: - return put_user((__u32) child->ptrace_message, - (unsigned int __force __user *) data); - case PTRACE_GETSIGINFO: - if (child->last_siginfo == NULL) - return -EINVAL; - return copy_siginfo_to_user32((compat_siginfo_t - __force __user *) data, - child->last_siginfo); - case PTRACE_SETSIGINFO: - if (child->last_siginfo == NULL) - return -EINVAL; - return copy_siginfo_from_user32(child->last_siginfo, - (compat_siginfo_t - __force __user *) data); } - return ptrace_request(child, request, addr, data); + return compat_ptrace_request(child, request, addr, data); } #endif -long arch_ptrace(struct task_struct *child, long request, long addr, long data) -{ - switch (request) { - case PTRACE_SYSCALL: - /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: - /* restart after signal. */ - if (!valid_signal(data)) - return -EIO; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - /* make sure the single step bit is not set. */ - user_disable_single_step(child); - wake_up_process(child); - return 0; - - case PTRACE_KILL: - /* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - return 0; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - user_disable_single_step(child); - wake_up_process(child); - return 0; - - case PTRACE_SINGLESTEP: - /* set the trap flag. */ - if (!valid_signal(data)) - return -EIO; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - user_enable_single_step(child); - /* give it a chance to run. */ - wake_up_process(child); - return 0; - - /* Do requests that differ for 31/64 bit */ - default: -#ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_31BIT)) - return do_ptrace_emu31(child, request, addr, data); -#endif - return do_ptrace_normal(child, request, addr, data); - } - /* Not reached. */ - return -EIO; -} - asmlinkage void syscall_trace(struct pt_regs *regs, int entryexit) { diff --git a/include/asm-s390/ptrace.h b/include/asm-s390/ptrace.h index 441d7c260857..d7d4e2eb3e6f 100644 --- a/include/asm-s390/ptrace.h +++ b/include/asm-s390/ptrace.h @@ -471,6 +471,8 @@ struct task_struct; extern void user_enable_single_step(struct task_struct *); extern void user_disable_single_step(struct task_struct *); +#define __ARCH_WANT_COMPAT_SYS_PTRACE + #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0) #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN) #define regs_return_value(regs)((regs)->gprs[2]) -- cgit v1.2.3 From 45e576b1c3d0020607b8666c0247164e92c7d719 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 7 May 2008 09:22:59 +0200 Subject: [S390] guest page hinting light Use the existing arch_alloc_page/arch_free_page callbacks to do the guest page state transitions between stable and unused. Acked-by: Rik van Riel Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 7 ++++ arch/s390/mm/Makefile | 1 + arch/s390/mm/init.c | 3 ++ arch/s390/mm/page-states.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++ include/asm-s390/page.h | 11 +++++++ include/asm-s390/system.h | 6 ++++ 6 files changed, 107 insertions(+) create mode 100644 arch/s390/mm/page-states.c (limited to 'include') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 29a7940f284f..1d035082e78e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -430,6 +430,13 @@ config CMM_IUCV Select this option to enable the special message interface to the cooperative memory management. +config PAGE_STATES + bool "Unused page notification" + help + This enables the notification of unused pages to the + hypervisor. The ESSA instruction is used to do the states + changes between a page that has content and the unused state. + config VIRT_TIMER bool "Virtual CPU timer support" help diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index fb988a48a754..2a7458134544 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -5,3 +5,4 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_PAGE_STATES) += page-states.o diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index fa31de6ae97a..29f3a63806b9 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -126,6 +126,9 @@ void __init mem_init(void) /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); + /* Setup guest page hinting */ + cmma_init(); + /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c new file mode 100644 index 000000000000..fc0ad73ffd90 --- /dev/null +++ b/arch/s390/mm/page-states.c @@ -0,0 +1,79 @@ +/* + * arch/s390/mm/page-states.c + * + * Copyright IBM Corp. 2008 + * + * Guest page hinting for unused pages. + * + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include +#include +#include + +#define ESSA_SET_STABLE 1 +#define ESSA_SET_UNUSED 2 + +static int cmma_flag; + +static int __init cmma(char *str) +{ + char *parm; + parm = strstrip(str); + if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) { + cmma_flag = 1; + return 1; + } + cmma_flag = 0; + if (strcmp(parm, "no") == 0 || strcmp(parm, "off") == 0) + return 1; + return 0; +} + +__setup("cmma=", cmma); + +void __init cmma_init(void) +{ + register unsigned long tmp asm("0") = 0; + register int rc asm("1") = -EOPNOTSUPP; + + if (!cmma_flag) + return; + asm volatile( + " .insn rrf,0xb9ab0000,%1,%1,0,0\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+&d" (rc), "+&d" (tmp)); + if (rc) + cmma_flag = 0; +} + +void arch_free_page(struct page *page, int order) +{ + int i, rc; + + if (!cmma_flag) + return; + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" ((page_to_pfn(page) + i) << PAGE_SHIFT), + "i" (ESSA_SET_UNUSED)); +} + +void arch_alloc_page(struct page *page, int order) +{ + int i, rc; + + if (!cmma_flag) + return; + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" ((page_to_pfn(page) + i) << PAGE_SHIFT), + "i" (ESSA_SET_STABLE)); +} diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h index f0f4579eac13..12fd9c4f0f15 100644 --- a/include/asm-s390/page.h +++ b/include/asm-s390/page.h @@ -125,6 +125,17 @@ page_get_storage_key(unsigned long addr) return skey; } +#ifdef CONFIG_PAGE_STATES + +struct page; +void arch_free_page(struct page *page, int order); +void arch_alloc_page(struct page *page, int order); + +#define HAVE_ARCH_FREE_PAGE +#define HAVE_ARCH_ALLOC_PAGE + +#endif + #endif /* !__ASSEMBLY__ */ /* to align the pointer to the (next) page boundary */ diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index c819ae25a842..e0d4500d5f95 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -116,6 +116,12 @@ extern void pfault_fini(void); #define pfault_fini() do { } while (0) #endif /* CONFIG_PFAULT */ +#ifdef CONFIG_PAGE_STATES +extern void cmma_init(void); +#else +static inline void cmma_init(void) { } +#endif + #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ account_vtime(prev); \ -- cgit v1.2.3 From 7f3d4ee108c184ab215036051087aaaaa8de7661 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 7 May 2008 09:22:39 +0200 Subject: vfs: splice remove_suid() cleanup generic_file_splice_write() duplicates remove_suid() just because it doesn't hold i_mutex. But it grabs i_mutex inside splice_from_pipe() anyway, so this is rather pointless. Move locking to generic_file_splice_write() and call remove_suid() and __splice_from_pipe() instead. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- fs/splice.c | 29 +++++++++++++---------------- include/linux/fs.h | 1 - mm/filemap.c | 2 +- 3 files changed, 14 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/splice.c b/fs/splice.c index 633f58ebfb72..cece15b4ef72 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -811,24 +811,19 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, { struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; - int killsuid, killpriv; + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; ssize_t ret; - int err = 0; - - killpriv = security_inode_need_killpriv(out->f_path.dentry); - killsuid = should_remove_suid(out->f_path.dentry); - if (unlikely(killsuid || killpriv)) { - mutex_lock(&inode->i_mutex); - if (killpriv) - err = security_inode_killpriv(out->f_path.dentry); - if (!err && killsuid) - err = __remove_suid(out->f_path.dentry, killsuid); - mutex_unlock(&inode->i_mutex); - if (err) - return err; - } - ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); + inode_double_lock(inode, pipe->inode); + ret = remove_suid(out->f_path.dentry); + if (likely(!ret)) + ret = __splice_from_pipe(pipe, &sd, pipe_to_file); + inode_double_unlock(inode, pipe->inode); if (ret > 0) { unsigned long nr_pages; @@ -840,6 +835,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, * sync it. */ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { + int err; + mutex_lock(&inode->i_mutex); err = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7e0fa9e64479..f413085f748e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1816,7 +1816,6 @@ extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); extern void destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); -extern int __remove_suid(struct dentry *, int); extern int should_remove_suid(struct dentry *); extern int remove_suid(struct dentry *); diff --git a/mm/filemap.c b/mm/filemap.c index 239d36163bbe..2dead9adf8b7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1655,7 +1655,7 @@ int should_remove_suid(struct dentry *dentry) } EXPORT_SYMBOL(should_remove_suid); -int __remove_suid(struct dentry *dentry, int kill) +static int __remove_suid(struct dentry *dentry, int kill) { struct iattr newattrs; -- cgit v1.2.3 From 221e583a735fc5d879d83c2a76b8ee5afcbdf146 Mon Sep 17 00:00:00 2001 From: Rasmus Rohde Date: Wed, 30 Apr 2008 17:22:06 +0200 Subject: udf: Make udf exportable Cc: Christoph Hellwig Signed-off-by: Rasmus Rohde Signed-off-by: Jan Kara --- fs/udf/namei.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++- fs/udf/super.c | 1 + fs/udf/udfdecl.h | 1 + include/linux/exportfs.h | 21 +++++++ 4 files changed, 161 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/udf/namei.c b/fs/udf/namei.c index ba5537d4bc15..47a6589e10b5 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -32,6 +32,7 @@ #include #include #include +#include static inline int udf_match(int len1, const char *name1, int len2, const char *name2) @@ -158,6 +159,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, sector_t offset; struct extent_position epos = {}; struct udf_inode_info *dinfo = UDF_I(dir); + int isdotdot = dentry->d_name.len == 2 && + dentry->d_name.name[0] == '.' && dentry->d_name.name[1] == '.'; size = udf_ext0_offset(dir) + dir->i_size; f_pos = udf_ext0_offset(dir); @@ -225,6 +228,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, continue; } + if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) && + isdotdot) { + brelse(epos.bh); + return fi; + } + if (!lfi) continue; @@ -286,9 +295,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, } } unlock_kernel(); - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } static struct fileIdentDesc *udf_add_entry(struct inode *dir, @@ -1232,6 +1240,134 @@ end_rename: return retval; } +static struct dentry *udf_get_parent(struct dentry *child) +{ + struct dentry *parent; + struct inode *inode = NULL; + struct dentry dotdot; + struct fileIdentDesc cfi; + struct udf_fileident_bh fibh; + + dotdot.d_name.name = ".."; + dotdot.d_name.len = 2; + + lock_kernel(); + if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) + goto out_unlock; + + if (fibh.sbh != fibh.ebh) + brelse(fibh.ebh); + brelse(fibh.sbh); + + inode = udf_iget(child->d_inode->i_sb, + lelb_to_cpu(cfi.icb.extLocation)); + if (!inode) + goto out_unlock; + unlock_kernel(); + + parent = d_alloc_anon(inode); + if (!parent) { + iput(inode); + parent = ERR_PTR(-ENOMEM); + } + + return parent; +out_unlock: + unlock_kernel(); + return ERR_PTR(-EACCES); +} + + +static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block, + u16 partref, __u32 generation) +{ + struct inode *inode; + struct dentry *result; + kernel_lb_addr loc; + + if (block == 0) + return ERR_PTR(-ESTALE); + + loc.logicalBlockNum = block; + loc.partitionReferenceNum = partref; + inode = udf_iget(sb, loc); + + if (inode == NULL) + return ERR_PTR(-ENOMEM); + + if (generation && inode->i_generation != generation) { + iput(inode); + return ERR_PTR(-ESTALE); + } + result = d_alloc_anon(inode); + if (!result) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + return result; +} + +static struct dentry *udf_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + if ((fh_len != 3 && fh_len != 5) || + (fh_type != FILEID_UDF_WITH_PARENT && + fh_type != FILEID_UDF_WITHOUT_PARENT)) + return NULL; + + return udf_nfs_get_inode(sb, fid->udf.block, fid->udf.partref, + fid->udf.generation); +} + +static struct dentry *udf_fh_to_parent(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + if (fh_len != 5 || fh_type != FILEID_UDF_WITH_PARENT) + return NULL; + + return udf_nfs_get_inode(sb, fid->udf.parent_block, + fid->udf.parent_partref, + fid->udf.parent_generation); +} +static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, + int connectable) +{ + int len = *lenp; + struct inode *inode = de->d_inode; + kernel_lb_addr location = UDF_I(inode)->i_location; + struct fid *fid = (struct fid *)fh; + int type = FILEID_UDF_WITHOUT_PARENT; + + if (len < 3 || (connectable && len < 5)) + return 255; + + *lenp = 3; + fid->udf.block = location.logicalBlockNum; + fid->udf.partref = location.partitionReferenceNum; + fid->udf.generation = inode->i_generation; + + if (connectable && !S_ISDIR(inode->i_mode)) { + spin_lock(&de->d_lock); + inode = de->d_parent->d_inode; + location = UDF_I(inode)->i_location; + fid->udf.parent_block = location.logicalBlockNum; + fid->udf.parent_partref = location.partitionReferenceNum; + fid->udf.parent_generation = inode->i_generation; + spin_unlock(&de->d_lock); + *lenp = 5; + type = FILEID_UDF_WITH_PARENT; + } + + return type; +} + +const struct export_operations udf_export_ops = { + .encode_fh = udf_encode_fh, + .fh_to_dentry = udf_fh_to_dentry, + .fh_to_parent = udf_fh_to_parent, + .get_parent = udf_get_parent, +}; + const struct inode_operations udf_dir_inode_operations = { .lookup = udf_lookup, .create = udf_create, diff --git a/fs/udf/super.c b/fs/udf/super.c index b564fc140fe4..260f4b82c799 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1933,6 +1933,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) /* Fill in the rest of the superblock */ sb->s_op = &udf_sb_ops; + sb->s_export_op = &udf_export_ops; sb->dq_op = NULL; sb->s_dirt = 0; sb->s_magic = UDF_SUPER_MAGIC; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index f3f45d029277..8fa9c2d70911 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -73,6 +73,7 @@ struct task_struct; struct buffer_head; struct super_block; +extern const struct export_operations udf_export_ops; extern const struct inode_operations udf_dir_inode_operations; extern const struct file_operations udf_dir_operations; extern const struct inode_operations udf_file_inode_operations; diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index de8387b7ceb6..f5abd1306638 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -33,6 +33,19 @@ enum fid_type { * 32 bit parent directory inode number. */ FILEID_INO32_GEN_PARENT = 2, + + /* + * 32 bit block number, 16 bit partition reference, + * 16 bit unused, 32 bit generation number. + */ + FILEID_UDF_WITHOUT_PARENT = 0x51, + + /* + * 32 bit block number, 16 bit partition reference, + * 16 bit unused, 32 bit generation number, + * 32 bit parent block number, 32 bit parent generation number + */ + FILEID_UDF_WITH_PARENT = 0x52, }; struct fid { @@ -43,6 +56,14 @@ struct fid { u32 parent_ino; u32 parent_gen; } i32; + struct { + u32 block; + u16 partref; + u16 parent_partref; + u32 generation; + u32 parent_block; + u32 parent_generation; + } udf; __u32 raw[0]; }; }; -- cgit v1.2.3 From 6d63c275572d1e6f00d4fa154f16fbb0d8c2d2bf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 May 2008 09:51:23 +0200 Subject: cfq-iosched: make io priorities inherit CPU scheduling class as well as nice We currently set all processes to the best-effort scheduling class, regardless of what CPU scheduling class they belong to. Improve that so that we correctly track idle and rt scheduling classes as well. Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 4 ++-- include/linux/ioprio.h | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 7f909d2f4886..b399c62936e0 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1303,10 +1303,10 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); case IOPRIO_CLASS_NONE: /* - * no prio set, place us in the middle of the BE classes + * no prio set, inherit CPU scheduling settings */ cfqq->ioprio = task_nice_ioprio(tsk); - cfqq->ioprio_class = IOPRIO_CLASS_BE; + cfqq->ioprio_class = task_nice_ioclass(tsk); break; case IOPRIO_CLASS_RT: cfqq->ioprio = task_ioprio(ioc); diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 2a3bb1bb7433..f98a656b17e5 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -67,6 +67,20 @@ static inline int task_nice_ioprio(struct task_struct *task) return (task_nice(task) + 20) / 5; } +/* + * This is for the case where the task hasn't asked for a specific IO class. + * Check for idle and rt task process, and return appropriate IO class. + */ +static inline int task_nice_ioclass(struct task_struct *task) +{ + if (task->policy == SCHED_IDLE) + return IOPRIO_CLASS_IDLE; + else if (task->policy == SCHED_FIFO || task->policy == SCHED_RR) + return IOPRIO_CLASS_RT; + else + return IOPRIO_CLASS_BE; +} + /* * For inheritance, return the highest of the two given priorities */ -- cgit v1.2.3 From 28f13702f03e527fcb979747a882cf366c489c50 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 May 2008 10:15:46 +0200 Subject: block: avoid duplicate calls to get_part() in disk stat code get_part() is fairly expensive, as it O(N) loops over partitions to find the right one. In lots of normal IO paths we end up looking up the partition twice, to make matters even worse. Change the stat add code to accept a passed in partition instead. Signed-off-by: Jens Axboe --- block/blk-core.c | 18 ++++++++++-------- drivers/block/aoe/aoecmd.c | 10 ++++++---- include/linux/genhd.h | 35 ++++++++++++++++++----------------- 3 files changed, 34 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 1b7dddf94f4f..2987fe47b5ee 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -54,15 +54,16 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done); static void drive_stat_acct(struct request *rq, int new_io) { + struct hd_struct *part; int rw = rq_data_dir(rq); if (!blk_fs_request(rq) || !rq->rq_disk) return; - if (!new_io) { - __all_stat_inc(rq->rq_disk, merges[rw], rq->sector); - } else { - struct hd_struct *part = get_part(rq->rq_disk, rq->sector); + part = get_part(rq->rq_disk, rq->sector); + if (!new_io) + __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); + else { disk_round_stats(rq->rq_disk); rq->rq_disk->in_flight++; if (part) { @@ -1538,10 +1539,11 @@ static int __end_that_request_first(struct request *req, int error, } if (blk_fs_request(req) && req->rq_disk) { + struct hd_struct *part = get_part(req->rq_disk, req->sector); const int rw = rq_data_dir(req); - all_stat_add(req->rq_disk, sectors[rw], - nr_bytes >> 9, req->sector); + all_stat_add(req->rq_disk, part, sectors[rw], + nr_bytes >> 9, req->sector); } total_bytes = bio_nbytes = 0; @@ -1727,8 +1729,8 @@ static void end_that_request_last(struct request *req, int error) const int rw = rq_data_dir(req); struct hd_struct *part = get_part(disk, req->sector); - __all_stat_inc(disk, ios[rw], req->sector); - __all_stat_add(disk, ticks[rw], duration, req->sector); + __all_stat_inc(disk, part, ios[rw], req->sector); + __all_stat_add(disk, part, ticks[rw], duration, req->sector); disk_round_stats(disk); disk->in_flight--; if (part) { diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 8fc429cf82b6..41f818be2f7e 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -755,11 +755,13 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector { unsigned long n_sect = bio->bi_size >> 9; const int rw = bio_data_dir(bio); + struct hd_struct *part; - all_stat_inc(disk, ios[rw], sector); - all_stat_add(disk, ticks[rw], duration, sector); - all_stat_add(disk, sectors[rw], n_sect, sector); - all_stat_add(disk, io_ticks, duration, sector); + part = get_part(disk, sector); + all_stat_inc(disk, part, ios[rw], sector); + all_stat_add(disk, part, ticks[rw], duration, sector); + all_stat_add(disk, part, sectors[rw], n_sect, sector); + all_stat_add(disk, part, io_ticks, duration, sector); } void diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ecd2bf63fc84..e9874e7fcdf9 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -178,17 +178,17 @@ static inline struct hd_struct *get_part(struct gendisk *gendiskp, static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { int i; + for_each_possible_cpu(i) memset(per_cpu_ptr(gendiskp->dkstats, i), value, - sizeof (struct disk_stats)); + sizeof(struct disk_stats)); } #define __part_stat_add(part, field, addnd) \ (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd) -#define __all_stat_add(gendiskp, field, addnd, sector) \ +#define __all_stat_add(gendiskp, part, field, addnd, sector) \ ({ \ - struct hd_struct *part = get_part(gendiskp, sector); \ if (part) \ __part_stat_add(part, field, addnd); \ __disk_stat_add(gendiskp, field, addnd); \ @@ -203,11 +203,13 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { res; \ }) -static inline void part_stat_set_all(struct hd_struct *part, int value) { +static inline void part_stat_set_all(struct hd_struct *part, int value) +{ int i; + for_each_possible_cpu(i) memset(per_cpu_ptr(part->dkstats, i), value, - sizeof(struct disk_stats)); + sizeof(struct disk_stats)); } #else /* !CONFIG_SMP */ @@ -223,9 +225,8 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) #define __part_stat_add(part, field, addnd) \ (part->dkstats.field += addnd) -#define __all_stat_add(gendiskp, field, addnd, sector) \ +#define __all_stat_add(gendiskp, part, field, addnd, sector) \ ({ \ - struct hd_struct *part = get_part(gendiskp, sector); \ if (part) \ part->dkstats.field += addnd; \ __disk_stat_add(gendiskp, field, addnd); \ @@ -276,10 +277,10 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) #define part_stat_sub(gendiskp, field, subnd) \ part_stat_add(gendiskp, field, -subnd) -#define all_stat_add(gendiskp, field, addnd, sector) \ +#define all_stat_add(gendiskp, part, field, addnd, sector) \ do { \ preempt_disable(); \ - __all_stat_add(gendiskp, field, addnd, sector); \ + __all_stat_add(gendiskp, part, field, addnd, sector); \ preempt_enable(); \ } while (0) @@ -288,15 +289,15 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) #define all_stat_dec(gendiskp, field, sector) \ all_stat_add(gendiskp, field, -1, sector) -#define __all_stat_inc(gendiskp, field, sector) \ - __all_stat_add(gendiskp, field, 1, sector) -#define all_stat_inc(gendiskp, field, sector) \ - all_stat_add(gendiskp, field, 1, sector) +#define __all_stat_inc(gendiskp, part, field, sector) \ + __all_stat_add(gendiskp, part, field, 1, sector) +#define all_stat_inc(gendiskp, part, field, sector) \ + all_stat_add(gendiskp, part, field, 1, sector) -#define __all_stat_sub(gendiskp, field, subnd, sector) \ - __all_stat_add(gendiskp, field, -subnd, sector) -#define all_stat_sub(gendiskp, field, subnd, sector) \ - all_stat_add(gendiskp, field, -subnd, sector) +#define __all_stat_sub(gendiskp, part, field, subnd, sector) \ + __all_stat_add(gendiskp, part, field, -subnd, sector) +#define all_stat_sub(gendiskp, part, field, subnd, sector) \ + all_stat_add(gendiskp, part, field, -subnd, sector) /* Inlines to alloc and free disk stats in struct gendisk */ #ifdef CONFIG_SMP -- cgit v1.2.3 From ef75d49f116bccbb80bccd423ecf3cb86c4509a5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 8 May 2008 01:15:21 -0700 Subject: netfilter: nf_conntrack_sip: restrict RTP expect flushing on error to last request Some Inovaphone PBXs exhibit very stange behaviour: when dialing for example "123", the device sends INVITE requests for "1", "12" and "123" back to back. The first requests will elicit error responses from the receiver, causing the SIP helper to flush the RTP expectations even though we might still see a positive response. Note the sequence number of the last INVITE request that contained a media description and only flush the expectations when receiving a negative response for that sequence number. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 1 + net/netfilter/nf_conntrack_sip.c | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 5da04e586a3f..23aa2ec6b7b7 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -7,6 +7,7 @@ struct nf_ct_sip_master { unsigned int register_cseq; + unsigned int invite_cseq; }; enum sip_expectation_classes { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 9f4900069561..2f9bbc058b48 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -870,6 +870,7 @@ static int process_sdp(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); unsigned int matchoff, matchlen; unsigned int mediaoff, medialen; unsigned int sdpoff; @@ -959,6 +960,9 @@ static int process_sdp(struct sk_buff *skb, if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr); + if (ret == NF_ACCEPT && i > 0) + help->help.ct_sip_info.invite_cseq = cseq; + return ret; } static int process_invite_response(struct sk_buff *skb, @@ -967,14 +971,14 @@ static int process_invite_response(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dptr, datalen, cseq); - else { + else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); - return NF_ACCEPT; - } + return NF_ACCEPT; } static int process_update_response(struct sk_buff *skb, @@ -983,14 +987,14 @@ static int process_update_response(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dptr, datalen, cseq); - else { + else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); - return NF_ACCEPT; - } + return NF_ACCEPT; } static int process_prack_response(struct sk_buff *skb, @@ -999,14 +1003,14 @@ static int process_prack_response(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dptr, datalen, cseq); - else { + else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); - return NF_ACCEPT; - } + return NF_ACCEPT; } static int process_bye_request(struct sk_buff *skb, -- cgit v1.2.3 From 148c69b4b0ec267b08d3619651ae4a10a1768b04 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 May 2008 15:31:54 +0100 Subject: MN10300: Make cpu_relax() invoke barrier() Make cpu_relax() invoke barrier() to be the same as other arches. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/asm-mn10300/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-mn10300/processor.h b/include/asm-mn10300/processor.h index f1b081f53468..73239271873d 100644 --- a/include/asm-mn10300/processor.h +++ b/include/asm-mn10300/processor.h @@ -58,7 +58,7 @@ extern struct mn10300_cpuinfo boot_cpu_data; extern void identify_cpu(struct mn10300_cpuinfo *); extern void print_cpu_info(struct mn10300_cpuinfo *); extern void dodgy_tsc(void); -#define cpu_relax() do {} while (0) +#define cpu_relax() barrier() /* * User space process size: 1.75GB (default). -- cgit v1.2.3