From f69767a1ada3ac74be2e1ac0795a05e1d1384eff Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 2 Oct 2024 11:59:50 -0500 Subject: PCI: Add TLP Processing Hints (TPH) support Add support for PCIe TLP Processing Hints (TPH) support (see PCIe r6.2, sec 6.17). Add TPH register definitions in pci_regs.h, including the TPH Requester capability register, TPH Requester control register, TPH Completer capability, and the ST fields of MSI-X entry. Introduce pcie_enable_tph() and pcie_disable_tph(), enabling drivers to toggle TPH support and configure specific ST mode as needed. Also add a new kernel parameter, "pci=notph", allowing users to disable TPH support across the entire system. Link: https://lore.kernel.org/r/20241002165954.128085-2-wei.huang2@amd.com Co-developed-by: Jing Liu Co-developed-by: Paul Luse Co-developed-by: Eric Van Tassell Signed-off-by: Jing Liu Signed-off-by: Paul Luse Signed-off-by: Eric Van Tassell Signed-off-by: Wei Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek Reviewed-by: Jonathan Cameron Reviewed-by: Lukas Wunner --- include/linux/pci-tph.h | 21 +++++++++++++++++++++ include/linux/pci.h | 7 +++++++ 2 files changed, 28 insertions(+) create mode 100644 include/linux/pci-tph.h (limited to 'include/linux') diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h new file mode 100644 index 000000000000..58654a334ffb --- /dev/null +++ b/include/linux/pci-tph.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TPH (TLP Processing Hints) + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * Eric Van Tassell + * Wei Huang + */ +#ifndef LINUX_PCI_TPH_H +#define LINUX_PCI_TPH_H + +#ifdef CONFIG_PCIE_TPH +void pcie_disable_tph(struct pci_dev *pdev); +int pcie_enable_tph(struct pci_dev *pdev, int mode); +#else +static inline void pcie_disable_tph(struct pci_dev *pdev) { } +static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) +{ return -EINVAL; } +#endif + +#endif /* LINUX_PCI_TPH_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..8351d76b6e12 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -434,6 +434,7 @@ struct pci_dev { unsigned int ats_enabled:1; /* Address Translation Svc */ unsigned int pasid_enabled:1; /* Process Address Space ID */ unsigned int pri_enabled:1; /* Page Request Interface */ + unsigned int tph_enabled:1; /* TLP Processing Hints */ unsigned int is_managed:1; /* Managed via devres */ unsigned int is_msi_managed:1; /* MSI release via devres installed */ unsigned int needs_freset:1; /* Requires fundamental reset */ @@ -534,6 +535,12 @@ struct pci_dev { /* These methods index pci_reset_fn_methods[] */ u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */ + +#ifdef CONFIG_PCIE_TPH + u16 tph_cap; /* TPH capability offset */ + u8 tph_mode; /* TPH mode */ + u8 tph_req_type; /* TPH requester type */ +#endif }; static inline struct pci_dev *pci_physfn(struct pci_dev *dev) -- cgit v1.2.3 From d2e8a34876ce69b27f450eebfc550ab8e316f752 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 2 Oct 2024 11:59:51 -0500 Subject: PCI/TPH: Add Steering Tag support Add pcie_tph_get_cpu_st() to allow a caller to retrieve Steering Tags for a target memory associated with a specific CPU. The ST tag is retrieved by invoking PCI ACPI "_DSM to Query Cache Locality TPH Features" method (rev=0x7, func=0xF) of the device's Root Port device. Add pcie_tph_set_st_entry() to update the device's Steering Tags. The tags will be written into the device's MSI-X table or the ST table located in the TPH Extended Capability space. Co-developed-by: Eric Van Tassell Link: https://lore.kernel.org/r/20241002165954.128085-3-wei.huang2@amd.com Signed-off-by: Eric Van Tassell Signed-off-by: Wei Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek --- drivers/pci/tph.c | 352 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/pci-tph.h | 23 ++++ 2 files changed, 374 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index 4d6317cbf8a6..1e604fbbda65 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -7,6 +7,8 @@ * Wei Huang */ #include +#include +#include #include #include @@ -15,6 +17,134 @@ /* System-wide TPH disabled */ static bool pci_tph_disabled; +#ifdef CONFIG_ACPI +/* + * The st_info struct defines the Steering Tag (ST) info returned by the + * firmware PCI ACPI _DSM method (rev=0x7, func=0xF, "_DSM to Query Cache + * Locality TPH Features"), as specified in the approved ECN for PCI Firmware + * Spec and available at https://members.pcisig.com/wg/PCI-SIG/document/15470. + * + * @vm_st_valid: 8-bit ST for volatile memory is valid + * @vm_xst_valid: 16-bit extended ST for volatile memory is valid + * @vm_ph_ignore: 1 => PH was and will be ignored, 0 => PH should be supplied + * @vm_st: 8-bit ST for volatile mem + * @vm_xst: 16-bit extended ST for volatile mem + * @pm_st_valid: 8-bit ST for persistent memory is valid + * @pm_xst_valid: 16-bit extended ST for persistent memory is valid + * @pm_ph_ignore: 1 => PH was and will be ignored, 0 => PH should be supplied + * @pm_st: 8-bit ST for persistent mem + * @pm_xst: 16-bit extended ST for persistent mem + */ +union st_info { + struct { + u64 vm_st_valid : 1; + u64 vm_xst_valid : 1; + u64 vm_ph_ignore : 1; + u64 rsvd1 : 5; + u64 vm_st : 8; + u64 vm_xst : 16; + u64 pm_st_valid : 1; + u64 pm_xst_valid : 1; + u64 pm_ph_ignore : 1; + u64 rsvd2 : 5; + u64 pm_st : 8; + u64 pm_xst : 16; + }; + u64 value; +}; + +static u16 tph_extract_tag(enum tph_mem_type mem_type, u8 req_type, + union st_info *info) +{ + switch (req_type) { + case PCI_TPH_REQ_TPH_ONLY: /* 8-bit tag */ + switch (mem_type) { + case TPH_MEM_TYPE_VM: + if (info->vm_st_valid) + return info->vm_st; + break; + case TPH_MEM_TYPE_PM: + if (info->pm_st_valid) + return info->pm_st; + break; + } + break; + case PCI_TPH_REQ_EXT_TPH: /* 16-bit tag */ + switch (mem_type) { + case TPH_MEM_TYPE_VM: + if (info->vm_xst_valid) + return info->vm_xst; + break; + case TPH_MEM_TYPE_PM: + if (info->pm_xst_valid) + return info->pm_xst; + break; + } + break; + default: + return 0; + } + + return 0; +} + +#define TPH_ST_DSM_FUNC_INDEX 0xF +static acpi_status tph_invoke_dsm(acpi_handle handle, u32 cpu_uid, + union st_info *st_out) +{ + union acpi_object arg3[3], in_obj, *out_obj; + + if (!acpi_check_dsm(handle, &pci_acpi_dsm_guid, 7, + BIT(TPH_ST_DSM_FUNC_INDEX))) + return AE_ERROR; + + /* DWORD: feature ID (0 for processor cache ST query) */ + arg3[0].integer.type = ACPI_TYPE_INTEGER; + arg3[0].integer.value = 0; + + /* DWORD: target UID */ + arg3[1].integer.type = ACPI_TYPE_INTEGER; + arg3[1].integer.value = cpu_uid; + + /* QWORD: properties, all 0's */ + arg3[2].integer.type = ACPI_TYPE_INTEGER; + arg3[2].integer.value = 0; + + in_obj.type = ACPI_TYPE_PACKAGE; + in_obj.package.count = ARRAY_SIZE(arg3); + in_obj.package.elements = arg3; + + out_obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 7, + TPH_ST_DSM_FUNC_INDEX, &in_obj); + if (!out_obj) + return AE_ERROR; + + if (out_obj->type != ACPI_TYPE_BUFFER) { + ACPI_FREE(out_obj); + return AE_ERROR; + } + + st_out->value = *((u64 *)(out_obj->buffer.pointer)); + + ACPI_FREE(out_obj); + + return AE_OK; +} +#endif + +/* Update the TPH Requester Enable field of TPH Control Register */ +static void set_ctrl_reg_req_en(struct pci_dev *pdev, u8 req_type) +{ + u32 reg; + + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, ®); + + reg &= ~PCI_TPH_CTRL_REQ_EN_MASK; + reg |= FIELD_PREP(PCI_TPH_CTRL_REQ_EN_MASK, req_type); + + pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, reg); +} + static u8 get_st_modes(struct pci_dev *pdev) { u32 reg; @@ -25,6 +155,37 @@ static u8 get_st_modes(struct pci_dev *pdev) return reg; } +static u32 get_st_table_loc(struct pci_dev *pdev) +{ + u32 reg; + + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); + + return FIELD_GET(PCI_TPH_CAP_LOC_MASK, reg); +} + +/* + * Return the size of ST table. If ST table is not in TPH Requester Extended + * Capability space, return 0. Otherwise return the ST Table Size + 1. + */ +static u16 get_st_table_size(struct pci_dev *pdev) +{ + u32 reg; + u32 loc; + + /* Check ST table location first */ + loc = get_st_table_loc(pdev); + + /* Convert loc to match with PCI_TPH_LOC_* defined in pci_regs.h */ + loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc); + if (loc != PCI_TPH_LOC_CAP) + return 0; + + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); + + return FIELD_GET(PCI_TPH_CAP_ST_MASK, reg) + 1; +} + /* Return device's Root Port completer capability */ static u8 get_rp_completer_type(struct pci_dev *pdev) { @@ -43,6 +204,171 @@ static u8 get_rp_completer_type(struct pci_dev *pdev) return FIELD_GET(PCI_EXP_DEVCAP2_TPH_COMP_MASK, reg); } +/* Write ST to MSI-X vector control reg - Return 0 if OK, otherwise -errno */ +static int write_tag_to_msix(struct pci_dev *pdev, int msix_idx, u16 tag) +{ +#ifdef CONFIG_PCI_MSI + struct msi_desc *msi_desc = NULL; + void __iomem *vec_ctrl; + u32 val; + int err = 0; + + msi_lock_descs(&pdev->dev); + + /* Find the msi_desc entry with matching msix_idx */ + msi_for_each_desc(msi_desc, &pdev->dev, MSI_DESC_ASSOCIATED) { + if (msi_desc->msi_index == msix_idx) + break; + } + + if (!msi_desc) { + err = -ENXIO; + goto err_out; + } + + /* Get the vector control register (offset 0xc) pointed by msix_idx */ + vec_ctrl = pdev->msix_base + msix_idx * PCI_MSIX_ENTRY_SIZE; + vec_ctrl += PCI_MSIX_ENTRY_VECTOR_CTRL; + + val = readl(vec_ctrl); + val &= ~PCI_MSIX_ENTRY_CTRL_ST; + val |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag); + writel(val, vec_ctrl); + + /* Read back to flush the update */ + val = readl(vec_ctrl); + +err_out: + msi_unlock_descs(&pdev->dev); + return err; +#else + return -ENODEV; +#endif +} + +/* Write tag to ST table - Return 0 if OK, otherwise -errno */ +static int write_tag_to_st_table(struct pci_dev *pdev, int index, u16 tag) +{ + int st_table_size; + int offset; + + /* Check if index is out of bound */ + st_table_size = get_st_table_size(pdev); + if (index >= st_table_size) + return -ENXIO; + + offset = pdev->tph_cap + PCI_TPH_BASE_SIZEOF + index * sizeof(u16); + + return pci_write_config_word(pdev, offset, tag); +} + +/** + * pcie_tph_get_cpu_st() - Retrieve Steering Tag for a target memory associated + * with a specific CPU + * @pdev: PCI device + * @mem_type: target memory type (volatile or persistent RAM) + * @cpu_uid: associated CPU id + * @tag: Steering Tag to be returned + * + * Return the Steering Tag for a target memory that is associated with a + * specific CPU as indicated by cpu_uid. + * + * Return: 0 if success, otherwise negative value (-errno) + */ +int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag) +{ +#ifdef CONFIG_ACPI + struct pci_dev *rp; + acpi_handle rp_acpi_handle; + union st_info info; + + rp = pcie_find_root_port(pdev); + if (!rp || !rp->bus || !rp->bus->bridge) + return -ENODEV; + + rp_acpi_handle = ACPI_HANDLE(rp->bus->bridge); + + if (tph_invoke_dsm(rp_acpi_handle, cpu_uid, &info) != AE_OK) { + *tag = 0; + return -EINVAL; + } + + *tag = tph_extract_tag(mem_type, pdev->tph_req_type, &info); + + pci_dbg(pdev, "get steering tag: mem_type=%s, cpu_uid=%d, tag=%#04x\n", + (mem_type == TPH_MEM_TYPE_VM) ? "volatile" : "persistent", + cpu_uid, *tag); + + return 0; +#else + return -ENODEV; +#endif +} +EXPORT_SYMBOL(pcie_tph_get_cpu_st); + +/** + * pcie_tph_set_st_entry() - Set Steering Tag in the ST table entry + * @pdev: PCI device + * @index: ST table entry index + * @tag: Steering Tag to be written + * + * Figure out the proper location of ST table, either in the MSI-X table or + * in the TPH Extended Capability space, and write the Steering Tag into + * the ST entry pointed by index. + * + * Return: 0 if success, otherwise negative value (-errno) + */ +int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) +{ + u32 loc; + int err = 0; + + if (!pdev->tph_cap) + return -EINVAL; + + if (!pdev->tph_enabled) + return -EINVAL; + + /* No need to write tag if device is in "No ST Mode" */ + if (pdev->tph_mode == PCI_TPH_ST_NS_MODE) + return 0; + + /* + * Disable TPH before updating ST to avoid potential instability as + * cautioned in PCIe r6.2, sec 6.17.3, "ST Modes of Operation" + */ + set_ctrl_reg_req_en(pdev, PCI_TPH_REQ_DISABLE); + + loc = get_st_table_loc(pdev); + /* Convert loc to match with PCI_TPH_LOC_* */ + loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc); + + switch (loc) { + case PCI_TPH_LOC_MSIX: + err = write_tag_to_msix(pdev, index, tag); + break; + case PCI_TPH_LOC_CAP: + err = write_tag_to_st_table(pdev, index, tag); + break; + default: + err = -EINVAL; + } + + if (err) { + pcie_disable_tph(pdev); + return err; + } + + set_ctrl_reg_req_en(pdev, pdev->tph_mode); + + pci_dbg(pdev, "set steering tag: %s table, index=%d, tag=%#04x\n", + (loc == PCI_TPH_LOC_MSIX) ? "MSI-X" : "ST", index, tag); + + return 0; +} +EXPORT_SYMBOL(pcie_tph_set_st_entry); + /** * pcie_disable_tph - Turn off TPH support for device * @pdev: PCI device @@ -140,6 +466,8 @@ EXPORT_SYMBOL(pcie_enable_tph); void pci_restore_tph_state(struct pci_dev *pdev) { struct pci_cap_saved_state *save_state; + int num_entries, i, offset; + u16 *st_entry; u32 *cap; if (!pdev->tph_cap) @@ -155,11 +483,21 @@ void pci_restore_tph_state(struct pci_dev *pdev) /* Restore control register and all ST entries */ cap = &save_state->cap.data[0]; pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, *cap++); + st_entry = (u16 *)cap; + offset = PCI_TPH_BASE_SIZEOF; + num_entries = get_st_table_size(pdev); + for (i = 0; i < num_entries; i++) { + pci_write_config_word(pdev, pdev->tph_cap + offset, + *st_entry++); + offset += sizeof(u16); + } } void pci_save_tph_state(struct pci_dev *pdev) { struct pci_cap_saved_state *save_state; + int num_entries, i, offset; + u16 *st_entry; u32 *cap; if (!pdev->tph_cap) @@ -175,6 +513,16 @@ void pci_save_tph_state(struct pci_dev *pdev) /* Save control register */ cap = &save_state->cap.data[0]; pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, cap++); + + /* Save all ST entries in extended capability structure */ + st_entry = (u16 *)cap; + offset = PCI_TPH_BASE_SIZEOF; + num_entries = get_st_table_size(pdev); + for (i = 0; i < num_entries; i++) { + pci_read_config_word(pdev, pdev->tph_cap + offset, + st_entry++); + offset += sizeof(u16); + } } void pci_no_tph(void) @@ -186,12 +534,14 @@ void pci_no_tph(void) void pci_tph_init(struct pci_dev *pdev) { + int num_entries; u32 save_size; pdev->tph_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_TPH); if (!pdev->tph_cap) return; - save_size = sizeof(u32); + num_entries = get_st_table_size(pdev); + save_size = sizeof(u32) + num_entries * sizeof(u16); pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_TPH, save_size); } diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h index 58654a334ffb..c3e806c13d64 100644 --- a/include/linux/pci-tph.h +++ b/include/linux/pci-tph.h @@ -9,10 +9,33 @@ #ifndef LINUX_PCI_TPH_H #define LINUX_PCI_TPH_H +/* + * According to the ECN for PCI Firmware Spec, Steering Tag can be different + * depending on the memory type: Volatile Memory or Persistent Memory. When a + * caller query about a target's Steering Tag, it must provide the target's + * tph_mem_type. ECN link: https://members.pcisig.com/wg/PCI-SIG/document/15470. + */ +enum tph_mem_type { + TPH_MEM_TYPE_VM, /* volatile memory */ + TPH_MEM_TYPE_PM /* persistent memory */ +}; + #ifdef CONFIG_PCIE_TPH +int pcie_tph_set_st_entry(struct pci_dev *pdev, + unsigned int index, u16 tag); +int pcie_tph_get_cpu_st(struct pci_dev *dev, + enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag); void pcie_disable_tph(struct pci_dev *pdev); int pcie_enable_tph(struct pci_dev *pdev, int mode); #else +static inline int pcie_tph_set_st_entry(struct pci_dev *pdev, + unsigned int index, u16 tag) +{ return -EINVAL; } +static inline int pcie_tph_get_cpu_st(struct pci_dev *dev, + enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag) +{ return -EINVAL; } static inline void pcie_disable_tph(struct pci_dev *pdev) { } static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) { return -EINVAL; } -- cgit v1.2.3 From 9fb6fef0fb49124291837af1da5028f79d53f98e Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 14 Jun 2024 13:06:03 +0300 Subject: resource: Add resource set range and size helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting the end address for a resource with a given size lacks a helper and is therefore coded manually unlike the getter side which has a helper for resource size calculation. Also, almost all callsites that calculate the end address for a resource also set the start address right before it like this: res->start = start_addr; res->end = res->start + size - 1; Add resource_set_range(res, start_addr, size) that sets the start address and calculates the end address to simplify this often repeated fragment. Also add resource_set_size() for the cases where setting the start address of the resource is not necessary but mention in its kerneldoc that resource_set_range() is preferred when setting both addresses. Link: https://lore.kernel.org/r/20240614100606.15830-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/linux/ioport.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6e9fb667a1c5..5385349f0b8a 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -249,6 +249,38 @@ struct resource *lookup_resource(struct resource *root, resource_size_t start); int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size); resource_size_t resource_alignment(struct resource *res); + +/** + * resource_set_size - Calculate resource end address from size and start + * @res: Resource descriptor + * @size: Size of the resource + * + * Calculate the end address for @res based on @size. + * + * Note: The start address of @res must be set when calling this function. + * Prefer resource_set_range() if setting both the start address and @size. + */ +static inline void resource_set_size(struct resource *res, resource_size_t size) +{ + res->end = res->start + size - 1; +} + +/** + * resource_set_range - Set resource start and end addresses + * @res: Resource descriptor + * @start: Start address for the resource + * @size: Size of the resource + * + * Set @res start address and calculate the end address based on @size. + */ +static inline void resource_set_range(struct resource *res, + resource_size_t start, + resource_size_t size) +{ + res->start = start; + resource_set_size(res, size); +} + static inline resource_size_t resource_size(const struct resource *res) { return res->end - res->start + 1; -- cgit v1.2.3 From ce1dfe6d328966b75821c1f043a940eb2569768a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 12 Oct 2024 20:32:43 +0900 Subject: PCI: endpoint: Introduce pci_epc_mem_map()/unmap() Some endpoint controllers have requirements on the alignment of the controller physical memory address that must be used to map a RC PCI address region. For instance, the endpoint controller of the RK3399 SoC uses at most the lower 20 bits of a physical memory address region as the lower bits of a RC PCI address region. For mapping a PCI address region of size bytes starting from pci_addr, the exact number of address bits used is the number of address bits changing in the address range [pci_addr..pci_addr + size - 1]. For this example, this creates the following constraints: 1) The offset into the controller physical memory allocated for a mapping depends on the mapping size *and* the starting PCI address for the mapping. 2) A mapping size cannot exceed the controller windows size (1MB) minus the offset needed into the allocated physical memory, which can end up being a smaller size than the desired mapping size. Handling these constraints independently of the controller being used in an endpoint function driver is not possible with the current EPC API as only the ->align field in struct pci_epc_features is provided but used for BAR (inbound ATU mappings) mapping only. A new API is needed for function drivers to discover mapping constraints and handle non-static requirements based on the RC PCI address range to access. Introduce the endpoint controller operation ->align_addr() to allow the EPC core functions to obtain the size and the offset into a controller address region that must be allocated and mapped to access a RC PCI address region. The size of the mapping provided by the align_addr() operation can then be used as the size argument for the function pci_epc_mem_alloc_addr() and the offset into the allocated controller memory provided can be used to correctly handle data transfers. For endpoint controllers that have PCI address alignment constraints, the align_addr() operation may indicate upon return an effective PCI address mapping size that is smaller (but not 0) than the requested PCI address region size. The controller ->align_addr() operation is optional: controllers that do not have any alignment constraints for mapping RC PCI address regions do not need to implement this operation. For such controllers, it is always assumed that the mapping size is equal to the requested size of the PCI region and that the mapping offset is 0. The function pci_epc_mem_map() is introduced to use this new controller operation (if it is defined) to handle controller memory allocation and mapping to a RC PCI address region in endpoint function drivers. This function first uses the ->align_addr() controller operation to determine the controller memory address size (and offset into) needed for mapping an RC PCI address region. The result of this operation is used to allocate a controller physical memory region using pci_epc_mem_alloc_addr() and then to map that memory to the RC PCI address space with pci_epc_map_addr(). Since ->align_addr() () may indicate that not all of a RC PCI address region can be mapped, pci_epc_mem_map() may only partially map the RC PCI address region specified. It is the responsibility of the caller (an endpoint function driver) to handle such smaller mapping by repeatedly using pci_epc_mem_map() over the desried PCI address range. The counterpart of pci_epc_mem_map() to unmap and free a mapped controller memory address region is pci_epc_mem_unmap(). Both functions operate using the new struct pci_epc_map data structure. This new structure represents a mapping PCI address, mapping effective size, the size of the controller memory needed for the mapping as well as the physical and virtual CPU addresses of the mapping (phys_base and virt_base fields). For convenience, the physical and virtual CPU addresses within that mapping to use to access the target RC PCI address region are also provided (phys_addr and virt_addr fields). Endpoint function drivers can use struct pci_epc_map to access the mapped RC PCI address region using the ->virt_addr and ->pci_size fields. Co-developed-by: Rick Wertenbroek Signed-off-by: Rick Wertenbroek Signed-off-by: Damien Le Moal Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241012113246.95634-4-dlemoal@kernel.org [mani: squashed the patch that changed phy_addr_t to u64] Signed-off-by: Manivannan Sadhasivam --- drivers/pci/endpoint/pci-epc-core.c | 103 ++++++++++++++++++++++++++++++++++++ include/linux/pci-epc.h | 38 +++++++++++++ 2 files changed, 141 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index b854f1bab26f..04a85d2f7e2a 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -466,6 +466,109 @@ int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no, } EXPORT_SYMBOL_GPL(pci_epc_map_addr); +/** + * pci_epc_mem_map() - allocate and map a PCI address to a CPU address + * @epc: the EPC device on which the CPU address is to be allocated and mapped + * @func_no: the physical endpoint function number in the EPC device + * @vfunc_no: the virtual endpoint function number in the physical function + * @pci_addr: PCI address to which the CPU address should be mapped + * @pci_size: the number of bytes to map starting from @pci_addr + * @map: where to return the mapping information + * + * Allocate a controller memory address region and map it to a RC PCI address + * region, taking into account the controller physical address mapping + * constraints using the controller operation align_addr(). If this operation is + * not defined, we assume that there are no alignment constraints for the + * mapping. + * + * The effective size of the PCI address range mapped from @pci_addr is + * indicated by @map->pci_size. This size may be less than the requested + * @pci_size. The local virtual CPU address for the mapping is indicated by + * @map->virt_addr (@map->phys_addr indicates the physical address). + * The size and CPU address of the controller memory allocated and mapped are + * respectively indicated by @map->map_size and @map->virt_base (and + * @map->phys_base for the physical address of @map->virt_base). + * + * Returns 0 on success and a negative error code in case of error. + */ +int pci_epc_mem_map(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + u64 pci_addr, size_t pci_size, struct pci_epc_map *map) +{ + size_t map_size = pci_size; + size_t map_offset = 0; + int ret; + + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) + return -EINVAL; + + if (!pci_size || !map) + return -EINVAL; + + /* + * Align the PCI address to map. If the controller defines the + * .align_addr() operation, use it to determine the PCI address to map + * and the size of the mapping. Otherwise, assume that the controller + * has no alignment constraint. + */ + memset(map, 0, sizeof(*map)); + map->pci_addr = pci_addr; + if (epc->ops->align_addr) + map->map_pci_addr = + epc->ops->align_addr(epc, pci_addr, + &map_size, &map_offset); + else + map->map_pci_addr = pci_addr; + map->map_size = map_size; + if (map->map_pci_addr + map->map_size < pci_addr + pci_size) + map->pci_size = map->map_pci_addr + map->map_size - pci_addr; + else + map->pci_size = pci_size; + + map->virt_base = pci_epc_mem_alloc_addr(epc, &map->phys_base, + map->map_size); + if (!map->virt_base) + return -ENOMEM; + + map->phys_addr = map->phys_base + map_offset; + map->virt_addr = map->virt_base + map_offset; + + ret = pci_epc_map_addr(epc, func_no, vfunc_no, map->phys_base, + map->map_pci_addr, map->map_size); + if (ret) { + pci_epc_mem_free_addr(epc, map->phys_base, map->virt_base, + map->map_size); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(pci_epc_mem_map); + +/** + * pci_epc_mem_unmap() - unmap and free a CPU address region + * @epc: the EPC device on which the CPU address is allocated and mapped + * @func_no: the physical endpoint function number in the EPC device + * @vfunc_no: the virtual endpoint function number in the physical function + * @map: the mapping information + * + * Unmap and free a CPU address region that was allocated and mapped with + * pci_epc_mem_map(). + */ +void pci_epc_mem_unmap(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + struct pci_epc_map *map) +{ + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) + return; + + if (!map || !map->virt_base) + return; + + pci_epc_unmap_addr(epc, func_no, vfunc_no, map->phys_base); + pci_epc_mem_free_addr(epc, map->phys_base, map->virt_base, + map->map_size); +} +EXPORT_SYMBOL_GPL(pci_epc_mem_unmap); + /** * pci_epc_clear_bar() - reset the BAR * @epc: the EPC device for which the BAR has to be cleared diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 42ef06136bd1..de8cc3658220 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -32,11 +32,43 @@ pci_epc_interface_string(enum pci_epc_interface_type type) } } +/** + * struct pci_epc_map - information about EPC memory for mapping a RC PCI + * address range + * @pci_addr: start address of the RC PCI address range to map + * @pci_size: size of the RC PCI address range mapped from @pci_addr + * @map_pci_addr: RC PCI address used as the first address mapped (may be lower + * than @pci_addr) + * @map_size: size of the controller memory needed for mapping the RC PCI address + * range @pci_addr..@pci_addr+@pci_size + * @phys_base: base physical address of the allocated EPC memory for mapping the + * RC PCI address range + * @phys_addr: physical address at which @pci_addr is mapped + * @virt_base: base virtual address of the allocated EPC memory for mapping the + * RC PCI address range + * @virt_addr: virtual address at which @pci_addr is mapped + */ +struct pci_epc_map { + u64 pci_addr; + size_t pci_size; + + u64 map_pci_addr; + size_t map_size; + + phys_addr_t phys_base; + phys_addr_t phys_addr; + void __iomem *virt_base; + void __iomem *virt_addr; +}; + /** * struct pci_epc_ops - set of function pointers for performing EPC operations * @write_header: ops to populate configuration space header * @set_bar: ops to configure the BAR * @clear_bar: ops to reset the BAR + * @align_addr: operation to get the mapping address, mapping size and offset + * into a controller memory window needed to map an RC PCI address + * region * @map_addr: ops to map CPU address to PCI address * @unmap_addr: ops to unmap CPU address and PCI address * @set_msi: ops to set the requested number of MSI interrupts in the MSI @@ -61,6 +93,8 @@ struct pci_epc_ops { struct pci_epf_bar *epf_bar); void (*clear_bar)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, struct pci_epf_bar *epf_bar); + u64 (*align_addr)(struct pci_epc *epc, u64 pci_addr, size_t *size, + size_t *offset); int (*map_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t addr, u64 pci_addr, size_t size); void (*unmap_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, @@ -278,6 +312,10 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, phys_addr_t *phys_addr, size_t size); void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr, void __iomem *virt_addr, size_t size); +int pci_epc_mem_map(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + u64 pci_addr, size_t pci_size, struct pci_epc_map *map); +void pci_epc_mem_unmap(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + struct pci_epc_map *map); #else static inline void pci_epc_init_notify(struct pci_epc *epc) -- cgit v1.2.3 From 6eaa83ec229b1e99130456c4f6658430d509c76c Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 17 Oct 2024 17:11:08 +0300 Subject: PCI: Remove unused PCI_SUBTRACTIVE_DECODE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2fe2abf896c1 ("PCI: augment bus resource table with a list") added PCI_SUBTRACTIVE_DECODE which is put into the struct pci_bus_resource flags field but is never read. There seems to never have been users for it. Remove both PCI_SUBTRACTIVE_DECODE and the flags field from the struct pci_bus_resource. Link: https://lore.kernel.org/r/20241017141111.44612-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- arch/s390/pci/pci_bus.c | 2 +- arch/x86/pci/fixup.c | 2 +- drivers/pci/bus.c | 4 +--- drivers/pci/probe.c | 5 ++--- include/linux/pci.h | 12 +----------- 5 files changed, 6 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index daa5d7450c7d..5630af5deb8b 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -53,7 +53,7 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev) zpci_setup_bus_resources(zdev); for (i = 0; i < PCI_STD_NUM_BARS; i++) { if (zdev->bars[i].res) - pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0); + pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res); } } diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 98a9bb92d75c..0681ecfe3430 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -757,7 +757,7 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n", res); add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - pci_bus_add_resource(dev->bus, res, 0); + pci_bus_add_resource(dev->bus, res); } base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) | diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 55c853686051..9cf6d0f3ab2b 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -46,8 +46,7 @@ void pci_free_resource_list(struct list_head *resources) } EXPORT_SYMBOL(pci_free_resource_list); -void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, - unsigned int flags) +void pci_bus_add_resource(struct pci_bus *bus, struct resource *res) { struct pci_bus_resource *bus_res; @@ -58,7 +57,6 @@ void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, } bus_res->res = res; - bus_res->flags = flags; list_add_tail(&bus_res->list, &bus->resources); } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4f68414c3086..4243b1e6ece2 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -546,8 +546,7 @@ void pci_read_bridge_bases(struct pci_bus *child) if (dev->transparent) { pci_bus_for_each_resource(child->parent, res) { if (res && res->flags) { - pci_bus_add_resource(child, res, - PCI_SUBTRACTIVE_DECODE); + pci_bus_add_resource(child, res); pci_info(dev, " bridge window %pR (subtractive decode)\n", res); } @@ -1032,7 +1031,7 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) if (res->flags & IORESOURCE_BUS) pci_bus_insert_busn_res(bus, bus->number, res->end); else - pci_bus_add_resource(bus, res, 0); + pci_bus_add_resource(bus, res); if (offset) { if (resource_type(res) == IORESOURCE_IO) diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..6a9cf80d0d4b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -633,18 +633,9 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge); * Use pci_bus_for_each_resource() to iterate through all the resources. */ -/* - * PCI_SUBTRACTIVE_DECODE means the bridge forwards the window implicitly - * and there's no way to program the bridge with the details of the window. - * This does not apply to ACPI _CRS windows, even with the _DEC subtractive- - * decode bit set, because they are explicit and can be programmed with _SRS. - */ -#define PCI_SUBTRACTIVE_DECODE 0x1 - struct pci_bus_resource { struct list_head list; struct resource *res; - unsigned int flags; }; #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ @@ -1498,8 +1489,7 @@ void pci_add_resource(struct list_head *resources, struct resource *res); void pci_add_resource_offset(struct list_head *resources, struct resource *res, resource_size_t offset); void pci_free_resource_list(struct list_head *resources); -void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, - unsigned int flags); +void pci_bus_add_resource(struct pci_bus *bus, struct resource *res); struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n); void pci_bus_remove_resources(struct pci_bus *bus); void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res); -- cgit v1.2.3 From 469c9cb941480718db52876bbdb95a7a79b34889 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 17 Oct 2024 17:11:09 +0300 Subject: PCI: Move struct pci_bus_resource into bus.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct pci_bus_resource is only used in bus.c, so move it there. Link: https://lore.kernel.org/r/20241017141111.44612-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/bus.c | 12 ++++++++++++ include/linux/pci.h | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 9cf6d0f3ab2b..e0a2441be6d3 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -18,6 +18,18 @@ #include "pci.h" +/* + * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond + * to P2P or CardBus bridge windows) go in a table. Additional ones (for + * buses below host bridges or subtractive decode bridges) go in the list. + * Use pci_bus_for_each_resource() to iterate through all the resources. + */ + +struct pci_bus_resource { + struct list_head list; + struct resource *res; +}; + void pci_add_resource_offset(struct list_head *resources, struct resource *res, resource_size_t offset) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 6a9cf80d0d4b..5a9d849b28ef 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -626,18 +626,6 @@ void pci_set_host_bridge_release(struct pci_host_bridge *bridge, int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge); -/* - * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond - * to P2P or CardBus bridge windows) go in a table. Additional ones (for - * buses below host bridges or subtractive decode bridges) go in the list. - * Use pci_bus_for_each_resource() to iterate through all the resources. - */ - -struct pci_bus_resource { - struct list_head list; - struct resource *res; -}; - #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ struct pci_bus { -- cgit v1.2.3 From 04af8a399fa40310f831b4f1dc9f757085f41983 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 18 Oct 2024 17:47:48 +0300 Subject: PCI: Protect Link Control 2 Register with RMW locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCIe Bandwidth Controller performs RMW accesses the Link Control 2 Register which can occur concurrently to other sources of Link Control 2 Register writes. Therefore, add Link Control 2 Register among the PCI Express Capability Registers that need RMW locking. Link: https://lore.kernel.org/r/20241018144755.7875-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Reviewed-by: Jonathan Cameron --- Documentation/PCI/pciebus-howto.rst | 1 + include/linux/pci.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/Documentation/PCI/pciebus-howto.rst b/Documentation/PCI/pciebus-howto.rst index e48d01422efc..375d9ce171f6 100644 --- a/Documentation/PCI/pciebus-howto.rst +++ b/Documentation/PCI/pciebus-howto.rst @@ -221,6 +221,7 @@ a selected set of PCI Express Capability Registers: * Link Control Register * Root Control Register +* Link Control 2 Register Any change to those registers should be performed using RMW accessors to avoid problems due to concurrent updates. For the up-to-date list of diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..be5ed534c39c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1274,6 +1274,7 @@ static inline int pcie_capability_clear_and_set_word(struct pci_dev *dev, { switch (pos) { case PCI_EXP_LNKCTL: + case PCI_EXP_LNKCTL2: case PCI_EXP_RTCTL: return pcie_capability_clear_and_set_word_locked(dev, pos, clear, set); -- cgit v1.2.3 From d9d959c36bec59f11c0eb6b5308729e3c4901b5e Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:34 +0100 Subject: PCI: Make pcim_request_all_regions() a public function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to remove the deprecated function pcim_iomap_regions_request_all(), a few drivers need an interface to request all BARs a PCI device offers. Make pcim_request_all_regions() a public interface. Link: https://lore.kernel.org/r/20241030112743.104395-2-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Damien Le Moal Reviewed-by: Ilpo Järvinen --- drivers/pci/devres.c | 3 ++- include/linux/pci.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index b133967faef8..2a64da5c91fb 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -939,7 +939,7 @@ static void pcim_release_all_regions(struct pci_dev *pdev) * desired, release individual regions with pcim_release_region() or all of * them at once with pcim_release_all_regions(). */ -static int pcim_request_all_regions(struct pci_dev *pdev, const char *name) +int pcim_request_all_regions(struct pci_dev *pdev, const char *name) { int ret; int bar; @@ -957,6 +957,7 @@ err: return ret; } +EXPORT_SYMBOL(pcim_request_all_regions); /** * pcim_iomap_regions_request_all - Request all BARs and iomap specified ones diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..3b151c8331e5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2293,6 +2293,7 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) { } #endif +int pcim_request_all_regions(struct pci_dev *pdev, const char *name); void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, const char *name); -- cgit v1.2.3 From 6d9c59212523e719a63575222f1cd1b0aca3da4f Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 30 Oct 2024 12:27:43 +0100 Subject: PCI: Remove pcim_iomap_regions_request_all() pcim_iomap_regions_request_all() have been deprecated in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). All users of this function have been ported to other interfaces by now. Remove pcim_iomap_regions_request_all(). Link: https://lore.kernel.org/r/20241030112743.104395-11-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Damien Le Moal --- Documentation/driver-api/driver-model/devres.rst | 1 - drivers/pci/devres.c | 56 ------------------------ include/linux/pci.h | 2 - 3 files changed, 59 deletions(-) (limited to 'include/linux') diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 5f2ee8d717b1..3a30cf4f6c0d 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -394,7 +394,6 @@ PCI pcim_enable_device() : after success, some PCI ops become managed pcim_iomap() : do iomap() on a single BAR pcim_iomap_regions() : do request_region() and iomap() on multiple BARs - pcim_iomap_regions_request_all() : do request_region() on all and iomap() on multiple BARs pcim_iomap_table() : array of mapped addresses indexed by BAR pcim_iounmap() : do iounmap() on a single BAR pcim_iounmap_regions() : do iounmap() and release_region() on multiple BARs diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index 2a64da5c91fb..319a477a2135 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -959,62 +959,6 @@ err: } EXPORT_SYMBOL(pcim_request_all_regions); -/** - * pcim_iomap_regions_request_all - Request all BARs and iomap specified ones - * (DEPRECATED) - * @pdev: PCI device to map IO resources for - * @mask: Mask of BARs to iomap - * @name: Name associated with the requests - * - * Returns: 0 on success, negative error code on failure. - * - * Request all PCI BARs and iomap regions specified by @mask. - * - * To release these resources manually, call pcim_release_region() for the - * regions and pcim_iounmap() for the mappings. - * - * This function is DEPRECATED. Don't use it in new code. Instead, use one - * of the pcim_* region request functions in combination with a pcim_* - * mapping function. - */ -int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask, - const char *name) -{ - int bar; - int ret; - void __iomem **legacy_iomap_table; - - ret = pcim_request_all_regions(pdev, name); - if (ret != 0) - return ret; - - for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { - if (!mask_contains_bar(mask, bar)) - continue; - if (!pcim_iomap(pdev, bar, 0)) - goto err; - } - - return 0; - -err: - /* - * If bar is larger than 0, then pcim_iomap() above has most likely - * failed because of -EINVAL. If it is equal 0, most likely the table - * couldn't be created, indicating -ENOMEM. - */ - ret = bar > 0 ? -EINVAL : -ENOMEM; - legacy_iomap_table = (void __iomem **)pcim_iomap_table(pdev); - - while (--bar >= 0) - pcim_iounmap(pdev, legacy_iomap_table[bar]); - - pcim_release_all_regions(pdev); - - return ret; -} -EXPORT_SYMBOL(pcim_iomap_regions_request_all); - /** * pcim_iounmap_regions - Unmap and release PCI BARs * @pdev: PCI device to map IO resources for diff --git a/include/linux/pci.h b/include/linux/pci.h index 3b151c8331e5..b59197635c5c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2301,8 +2301,6 @@ void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name); -int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask, - const char *name); void pcim_iounmap_regions(struct pci_dev *pdev, int mask); void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar, unsigned long offset, unsigned long len); -- cgit v1.2.3 From 4a6afd60733c75369680f4a40c82b7c8528f4a7a Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 16 Oct 2024 11:49:04 +0200 Subject: PCI: Make pcim_iounmap_region() a public function The function pcim_iounmap_regions() is problematic because it uses a bitmask mechanism to release / iounmap multiple BARs at once. It, thus, prevents getting rid of the problematic iomap table mechanism which was deprecated in commit e354bb84a4c1 ("PCI: Deprecate pcim_iomap_table(), pcim_iomap_regions_request_all()"). pcim_iounmap_region() does not have that problem. Make it public as the successor of pcim_iounmap_regions(). Link: https://lore.kernel.org/r/20241016094911.24818-3-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas --- drivers/pci/devres.c | 3 ++- include/linux/pci.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index 319a477a2135..fc50d2456aed 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -773,7 +773,7 @@ EXPORT_SYMBOL(pcim_iomap_region); * Unmap a BAR and release its region manually. Only pass BARs that were * previously mapped by pcim_iomap_region(). */ -static void pcim_iounmap_region(struct pci_dev *pdev, int bar) +void pcim_iounmap_region(struct pci_dev *pdev, int bar) { struct pcim_addr_devres res_searched; @@ -784,6 +784,7 @@ static void pcim_iounmap_region(struct pci_dev *pdev, int bar) devres_release(&pdev->dev, pcim_addr_resource_release, pcim_addr_resources_match, &res_searched); } +EXPORT_SYMBOL(pcim_iounmap_region); /** * pcim_iomap_regions - Request and iomap PCI BARs (DEPRECATED) diff --git a/include/linux/pci.h b/include/linux/pci.h index b59197635c5c..b8d248c136ba 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2297,6 +2297,7 @@ int pcim_request_all_regions(struct pci_dev *pdev, const char *name); void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, const char *name); +void pcim_iounmap_region(struct pci_dev *pdev, int bar); void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); -- cgit v1.2.3 From 3b96b895127b7c0aed63d82c974b46340e8466c1 Mon Sep 17 00:00:00 2001 From: Esther Shimanovich Date: Tue, 10 Sep 2024 17:57:45 +0000 Subject: PCI: Detect and trust built-in Thunderbolt chips Some computers with CPUs that lack Thunderbolt features use discrete Thunderbolt chips to add Thunderbolt functionality. These Thunderbolt chips are located within the chassis; between the Root Port labeled ExternalFacingPort and the USB-C port. These Thunderbolt PCIe devices should be labeled as fixed and trusted, as they are built into the computer. Otherwise, security policies that rely on those flags may have unintended results, such as preventing USB-C ports from enumerating. Detect the above scenario through the process of elimination. 1) Integrated Thunderbolt host controllers already have Thunderbolt implemented, so anything outside their external facing Root Port is removable and untrusted. Detect them using the following properties: - Most integrated host controllers have the "usb4-host-interface" ACPI property, as described here: https://learn.microsoft.com/en-us/windows-hardware/drivers/pci/dsd-for-pcie-root-ports#mapping-native-protocols-pcie-displayport-tunneled-through-usb4-to-usb4-host-routers - Integrated Thunderbolt PCIe Root Ports before Alder Lake do not have the "usb4-host-interface" ACPI property. Identify those by their PCI IDs instead. 2) If a Root Port does not have integrated Thunderbolt capabilities, but has the "ExternalFacingPort" ACPI property, that means the manufacturer has opted to use a discrete Thunderbolt host controller that is built into the computer. This host controller can be identified by virtue of being located directly below an external-facing Root Port that lacks integrated Thunderbolt. Label it as trusted and fixed. Everything downstream from it is untrusted and removable. The "ExternalFacingPort" ACPI property is described here: https://learn.microsoft.com/en-us/windows-hardware/drivers/pci/dsd-for-pcie-root-ports#identifying-externally-exposed-pcie-root-ports Link: https://lore.kernel.org/r/20240910-trust-tbt-fix-v5-1-7a7a42a5f496@chromium.org Suggested-by: Mika Westerberg Signed-off-by: Esther Shimanovich Signed-off-by: Bjorn Helgaas Tested-by: Mika Westerberg Tested-by: Mario Limonciello Reviewed-by: Mika Westerberg Reviewed-by: Mario Limonciello --- arch/x86/pci/acpi.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/probe.c | 30 +++++++++---- include/linux/pci.h | 6 +++ 3 files changed, 148 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 55c4b07ec1f6..0c316bae1726 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -250,6 +250,125 @@ void __init pci_acpi_crs_quirks(void) pr_info("Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n"); } +/* + * Check if pdev is part of a PCIe switch that is directly below the + * specified bridge. + */ +static bool pcie_switch_directly_under(struct pci_dev *bridge, + struct pci_dev *pdev) +{ + struct pci_dev *parent = pci_upstream_bridge(pdev); + + /* If the device doesn't have a parent, it's not under anything */ + if (!parent) + return false; + + /* + * If the device has a PCIe type, check if it is below the + * corresponding PCIe switch components (if applicable). Then check + * if its upstream port is directly beneath the specified bridge. + */ + switch (pci_pcie_type(pdev)) { + case PCI_EXP_TYPE_UPSTREAM: + return parent == bridge; + + case PCI_EXP_TYPE_DOWNSTREAM: + if (pci_pcie_type(parent) != PCI_EXP_TYPE_UPSTREAM) + return false; + parent = pci_upstream_bridge(parent); + return parent == bridge; + + case PCI_EXP_TYPE_ENDPOINT: + if (pci_pcie_type(parent) != PCI_EXP_TYPE_DOWNSTREAM) + return false; + parent = pci_upstream_bridge(parent); + if (!parent || pci_pcie_type(parent) != PCI_EXP_TYPE_UPSTREAM) + return false; + parent = pci_upstream_bridge(parent); + return parent == bridge; + } + + return false; +} + +static bool pcie_has_usb4_host_interface(struct pci_dev *pdev) +{ + struct fwnode_handle *fwnode; + + /* + * For USB4, the tunneled PCIe Root or Downstream Ports are marked + * with the "usb4-host-interface" ACPI property, so we look for + * that first. This should cover most cases. + */ + fwnode = fwnode_find_reference(dev_fwnode(&pdev->dev), + "usb4-host-interface", 0); + if (!IS_ERR(fwnode)) { + fwnode_handle_put(fwnode); + return true; + } + + /* + * Any integrated Thunderbolt 3/4 PCIe Root Ports from Intel + * before Alder Lake do not have the "usb4-host-interface" + * property so we use their PCI IDs instead. All these are + * tunneled. This list is not expected to grow. + */ + if (pdev->vendor == PCI_VENDOR_ID_INTEL) { + switch (pdev->device) { + /* Ice Lake Thunderbolt 3 PCIe Root Ports */ + case 0x8a1d: + case 0x8a1f: + case 0x8a21: + case 0x8a23: + /* Tiger Lake-LP Thunderbolt 4 PCIe Root Ports */ + case 0x9a23: + case 0x9a25: + case 0x9a27: + case 0x9a29: + /* Tiger Lake-H Thunderbolt 4 PCIe Root Ports */ + case 0x9a2b: + case 0x9a2d: + case 0x9a2f: + case 0x9a31: + return true; + } + } + + return false; +} + +bool arch_pci_dev_is_removable(struct pci_dev *pdev) +{ + struct pci_dev *parent, *root; + + /* pdev without a parent or Root Port is never tunneled */ + parent = pci_upstream_bridge(pdev); + if (!parent) + return false; + root = pcie_find_root_port(pdev); + if (!root) + return false; + + /* Internal PCIe devices are not tunneled */ + if (!root->external_facing) + return false; + + /* Anything directly behind a "usb4-host-interface" is tunneled */ + if (pcie_has_usb4_host_interface(parent)) + return true; + + /* + * Check if this is a discrete Thunderbolt/USB4 controller that is + * directly behind the non-USB4 PCIe Root Port marked as + * "ExternalFacingPort". Those are not behind a PCIe tunnel. + */ + if (pcie_switch_directly_under(root, pdev)) + return false; + + /* PCIe devices after the discrete chip are tunneled */ + return true; +} + #ifdef CONFIG_PCI_MMCONFIG static int check_segment(u16 seg, struct device *dev, char *estr) { diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4f68414c3086..924b23962c45 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1633,23 +1633,33 @@ static void set_pcie_thunderbolt(struct pci_dev *dev) static void set_pcie_untrusted(struct pci_dev *dev) { - struct pci_dev *parent; + struct pci_dev *parent = pci_upstream_bridge(dev); + if (!parent) + return; /* - * If the upstream bridge is untrusted we treat this device + * If the upstream bridge is untrusted we treat this device as * untrusted as well. */ - parent = pci_upstream_bridge(dev); - if (parent && (parent->untrusted || parent->external_facing)) + if (parent->untrusted) { + dev->untrusted = true; + return; + } + + if (arch_pci_dev_is_removable(dev)) { + pci_dbg(dev, "marking as untrusted\n"); dev->untrusted = true; + } } static void pci_set_removable(struct pci_dev *dev) { struct pci_dev *parent = pci_upstream_bridge(dev); + if (!parent) + return; /* - * We (only) consider everything downstream from an external_facing + * We (only) consider everything tunneled below an external_facing * device to be removable by the user. We're mainly concerned with * consumer platforms with user accessible thunderbolt ports that are * vulnerable to DMA attacks, and we expect those ports to be marked by @@ -1659,9 +1669,15 @@ static void pci_set_removable(struct pci_dev *dev) * accessible to user / may not be removed by end user, and thus not * exposed as "removable" to userspace. */ - if (parent && - (parent->external_facing || dev_is_removable(&parent->dev))) + if (dev_is_removable(&parent->dev)) { + dev_set_removable(&dev->dev, DEVICE_REMOVABLE); + return; + } + + if (arch_pci_dev_is_removable(dev)) { + pci_dbg(dev, "marking as removable\n"); dev_set_removable(&dev->dev, DEVICE_REMOVABLE); + } } /** diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..4e77c4230c0a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2609,6 +2609,12 @@ pci_host_bridge_acpi_msi_domain(struct pci_bus *bus) { return NULL; } static inline bool pci_pr3_present(struct pci_dev *pdev) { return false; } #endif +#if defined(CONFIG_X86) && defined(CONFIG_ACPI) +bool arch_pci_dev_is_removable(struct pci_dev *pdev); +#else +static inline bool arch_pci_dev_is_removable(struct pci_dev *pdev) { return false; } +#endif + #ifdef CONFIG_EEH static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev) { -- cgit v1.2.3 From d2bd39c0456b75be9dfc7d774b8d021355c26ae3 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 18 Oct 2024 17:47:49 +0300 Subject: PCI: Store all PCIe Supported Link Speeds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCIe bandwidth controller added by a subsequent commit will require selecting PCIe Link Speeds that are lower than the Maximum Link Speed. The struct pci_bus only stores max_bus_speed. Even if PCIe r6.1 sec 8.2.1 currently disallows gaps in supported Link Speeds, the Implementation Note in PCIe r6.1 sec 7.5.3.18, recommends determining supported Link Speeds using the Supported Link Speeds Vector in the Link Capabilities 2 Register (when available) to "avoid software being confused if a future specification defines Links that do not require support for all slower speeds." Reuse code in pcie_get_speed_cap() to add pcie_get_supported_speeds() to query the Supported Link Speeds Vector of a PCIe device. The value is taken directly from the Supported Link Speeds Vector or synthesized from the Max Link Speed in the Link Capabilities Register when the Link Capabilities 2 Register is not available. The Supported Link Speeds Vector in the Link Capabilities Register 2 corresponds to the bus below on Root Ports and Downstream Ports, whereas it corresponds to the bus above on Upstream Ports and Endpoints (PCIe r6.1 sec 7.5.3.18): Supported Link Speeds Vector - This field indicates the supported Link speed(s) of the associated Port. Add supported_speeds into the struct pci_dev that caches the Supported Link Speeds Vector. supported_speeds contains a set of Link Speeds only in the case where PCIe Link Speed can be determined. Root Complex Integrated Endpoints do not have a well-defined Link Speed because they do not implement either of the Link Capabilities Registers, which is allowed by PCIe r6.1 sec 7.5.3 (the same limitation applies to determining cur_bus_speed and max_bus_speed that are PCI_SPEED_UNKNOWN in such case). This is of no concern from PCIe bandwidth controller point of view because such devices are not attached into a PCIe Root Port that could be controlled. The supported_speeds field keeps the extra reserved zero at the least significant bit to match the Link Capabilities 2 Register layout. An attempt was made to store supported_speeds field into the struct pci_bus as an intersection of both ends of the Link, however, the subordinate struct pci_bus is not available early enough. The Target Speed quirk (in pcie_failed_link_retrain()) can run either during initial scan or later, requiring it to use the API provided by the PCIe bandwidth controller to set the Target Link Speed in order to co-exist with the bandwidth controller. When the Target Speed quirk is calling the bandwidth controller during initial scan, the struct pci_bus is not yet initialized. As such, storing supported_speeds into the struct pci_bus is not viable. Suggested-by: Lukas Wunner Link: https://lore.kernel.org/r/20241018144755.7875-4-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: move pcie_get_supported_speeds() decl to drivers/pci/pci.h] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/pci.c | 58 +++++++++++++++++++++++++++++++------------ drivers/pci/pci.h | 1 + drivers/pci/probe.c | 3 +++ include/linux/pci.h | 10 +++++++- include/uapi/linux/pci_regs.h | 1 + 5 files changed, 56 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7d85c04fbba2..3d67e8b50ba2 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6189,38 +6189,64 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, EXPORT_SYMBOL(pcie_bandwidth_available); /** - * pcie_get_speed_cap - query for the PCI device's link speed capability + * pcie_get_supported_speeds - query Supported Link Speed Vector * @dev: PCI device to query * - * Query the PCI device speed capability. Return the maximum link speed - * supported by the device. + * Query @dev supported link speeds. + * + * Implementation Note in PCIe r6.0 sec 7.5.3.18 recommends determining + * supported link speeds using the Supported Link Speeds Vector in the Link + * Capabilities 2 Register (when available). + * + * Link Capabilities 2 was added in PCIe r3.0, sec 7.8.18. + * + * Without Link Capabilities 2, i.e., prior to PCIe r3.0, Supported Link + * Speeds field in Link Capabilities is used and only 2.5 GT/s and 5.0 GT/s + * speeds were defined. + * + * For @dev without Supported Link Speed Vector, the field is synthesized + * from the Max Link Speed field in the Link Capabilities Register. + * + * Return: Supported Link Speeds Vector (+ reserved 0 at LSB). */ -enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev) +u8 pcie_get_supported_speeds(struct pci_dev *dev) { u32 lnkcap2, lnkcap; + u8 speeds; /* - * Link Capabilities 2 was added in PCIe r3.0, sec 7.8.18. The - * implementation note there recommends using the Supported Link - * Speeds Vector in Link Capabilities 2 when supported. - * - * Without Link Capabilities 2, i.e., prior to PCIe r3.0, software - * should use the Supported Link Speeds field in Link Capabilities, - * where only 2.5 GT/s and 5.0 GT/s speeds were defined. + * Speeds retain the reserved 0 at LSB before PCIe Supported Link + * Speeds Vector to allow using SLS Vector bit defines directly. */ pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2); + speeds = lnkcap2 & PCI_EXP_LNKCAP2_SLS; /* PCIe r3.0-compliant */ - if (lnkcap2) - return PCIE_LNKCAP2_SLS2SPEED(lnkcap2); + if (speeds) + return speeds; pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap); + + /* Synthesize from the Max Link Speed field */ if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_5_0GB) - return PCIE_SPEED_5_0GT; + speeds = PCI_EXP_LNKCAP2_SLS_5_0GB | PCI_EXP_LNKCAP2_SLS_2_5GB; else if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_2_5GB) - return PCIE_SPEED_2_5GT; + speeds = PCI_EXP_LNKCAP2_SLS_2_5GB; - return PCI_SPEED_UNKNOWN; + return speeds; +} + +/** + * pcie_get_speed_cap - query for the PCI device's link speed capability + * @dev: PCI device to query + * + * Query the PCI device speed capability. + * + * Return: the maximum link speed supported by the device. + */ +enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev) +{ + return PCIE_LNKCAP2_SLS2SPEED(dev->supported_speeds); } EXPORT_SYMBOL(pcie_get_speed_cap); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 14d00ce45bfa..d0a46ecf7289 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -373,6 +373,7 @@ static inline int pcie_dev_speed_mbps(enum pci_bus_speed speed) return -EINVAL; } +u8 pcie_get_supported_speeds(struct pci_dev *dev); const char *pci_speed_string(enum pci_bus_speed speed); enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev); enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4f68414c3086..af153a8e8225 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1947,6 +1947,9 @@ int pci_setup_device(struct pci_dev *dev) set_pcie_untrusted(dev); + if (pci_is_pcie(dev)) + dev->supported_speeds = pcie_get_supported_speeds(dev); + /* "Unknown power state" */ dev->current_state = PCI_UNKNOWN; diff --git a/include/linux/pci.h b/include/linux/pci.h index be5ed534c39c..99c6fa30d25b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -318,7 +318,14 @@ struct pci_sriov; struct pci_p2pdma; struct rcec_ea; -/* The pci_dev structure describes PCI devices */ +/* struct pci_dev - describes a PCI device + * + * @supported_speeds: PCIe Supported Link Speeds Vector (+ reserved 0 at + * LSB). 0 when the supported speeds cannot be + * determined (e.g., for Root Complex Integrated + * Endpoints without the relevant Capability + * Registers). + */ struct pci_dev { struct list_head bus_list; /* Node in per-bus list */ struct pci_bus *bus; /* Bus this device is on */ @@ -522,6 +529,7 @@ struct pci_dev { struct npem *npem; /* Native PCIe Enclosure Management */ #endif u16 acs_cap; /* ACS Capability offset */ + u8 supported_speeds; /* Supported Link Speeds Vector */ phys_addr_t rom; /* Physical address if not from BAR */ size_t romlen; /* Length if not from BAR */ /* diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 12323b3334a9..f3c9de0a497c 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -678,6 +678,7 @@ #define PCI_EXP_DEVSTA2 0x2a /* Device Status 2 */ #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 0x2c /* end of v2 EPs w/o link */ #define PCI_EXP_LNKCAP2 0x2c /* Link Capabilities 2 */ +#define PCI_EXP_LNKCAP2_SLS 0x000000fe /* Supported Link Speeds Vector */ #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */ #define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */ -- cgit v1.2.3 From 38a18dfe9035d5a02a53271824de1854129c61dc Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 22 Oct 2024 15:48:51 -0700 Subject: PCI: Unexport pci_walk_bus_locked() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's only one user of pci_walk_bus_locked(), and it's internal to the PCI core. Unexport it and make it private to drivers/pci/. Link: https://lore.kernel.org/r/20241022224851.340648-6-kbusch@meta.com Signed-off-by: Keith Busch [bhelgaas: move decl to drivers/pci/pci.h] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Reviewed-by: Ilpo Järvinen --- drivers/pci/bus.c | 1 - drivers/pci/pci.h | 3 +++ include/linux/pci.h | 2 -- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 8491e9c7f058..30620f3bb0e2 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -435,7 +435,6 @@ void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void * __pci_walk_bus(top, cb, userdata); } -EXPORT_SYMBOL_GPL(pci_walk_bus_locked); struct pci_bus *pci_bus_get(struct pci_bus *bus) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 187c5c83412d..28cccfeb8076 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -323,6 +323,9 @@ void __pci_bus_assign_resources(const struct pci_bus *bus, struct list_head *realloc_head, struct list_head *fail_head); bool pci_bus_clip_resource(struct pci_dev *dev, int idx); +void pci_walk_bus_locked(struct pci_bus *top, + int (*cb)(struct pci_dev *, void *), + void *userdata); const char *pci_resource_name(struct pci_dev *dev, unsigned int i); diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..056290377273 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1623,8 +1623,6 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); -void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), - void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); -- cgit v1.2.3 From 665745f274870c921020f610e2c99a3b1613519b Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 18 Oct 2024 17:47:52 +0300 Subject: PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This mostly reverts the commit b4c7d2076b4e ("PCI/LINK: Remove bandwidth notification"). An upcoming commit extends this driver building PCIe bandwidth controller on top of it. PCIe bandwidth notifications were first added in the commit e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth notification") but later had to be removed. The significant changes compared with the old bandwidth notification driver include: 1) Don't print the notifications into kernel log, just keep the Link Speed cached in struct pci_bus updated. While somewhat unfortunate, the log spam was the source of complaints that eventually lead to the removal of the bandwidth notifications driver (see the links below for further information). 2) Besides the Link Bandwidth Management Interrupt, also enable Link Autonomous Bandwidth Interrupt to cover the other source of bandwidth changes. 3) Handle Link Speed updates robustly. Refresh the cached Link Speed when enabling Bandwidth Notification Interrupts, and solve the race between Link Speed read and LBMS/LABS update in pcie_bwnotif_irq_thread(). 4) Use concurrency safe LNKCTL RMW operations. 5) The driver is now called PCIe bwctrl (bandwidth controller) instead of just bandwidth notifications because of increased scope and functionality within the driver. 6) Coexist with the Target Link Speed quirk in pcie_failed_link_retrain(). Provide LBMS counting API for it. 7) Tweaks to variable/functions names for consistency and length reasons. Bandwidth Notifications enable the cur_bus_speed in the struct pci_bus to keep track PCIe Link Speed changes. [bhelgaas: This is based on previous work by Alexandru Gagniuc ; see e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth notification")] Link: https://lore.kernel.org/r/20241018144755.7875-7-ilpo.jarvinen@linux.intel.com Link: https://lore.kernel.org/all/20190429185611.121751-1-helgaas@kernel.org/ Link: https://lore.kernel.org/linux-pci/20190501142942.26972-1-keith.busch@intel.com/ Link: https://lore.kernel.org/linux-pci/20200115221008.GA191037@google.com/ Suggested-by: Lukas Wunner # Building bwctrl on top of bwnotif Signed-off-by: Ilpo Järvinen [bhelgaas: squash fix to drop IRQF_ONESHOT and convert to hardirq handler: https://lore.kernel.org/r/20241115165717.15233-1-ilpo.jarvinen@linux.intel.com] Signed-off-by: Bjorn Helgaas Tested-by: Stefan Wahren Reviewed-by: Jonathan Cameron --- MAINTAINERS | 6 ++ drivers/pci/hotplug/pciehp_ctrl.c | 5 + drivers/pci/pci.c | 2 +- drivers/pci/pci.h | 11 +++ drivers/pci/pcie/Makefile | 2 +- drivers/pci/pcie/bwctrl.c | 186 ++++++++++++++++++++++++++++++++++++++ drivers/pci/pcie/portdrv.c | 9 +- drivers/pci/pcie/portdrv.h | 6 +- drivers/pci/quirks.c | 9 +- include/linux/pci.h | 2 + 10 files changed, 229 insertions(+), 9 deletions(-) create mode 100644 drivers/pci/pcie/bwctrl.c (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index c27f3190737f..8c555b3325d6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17933,6 +17933,12 @@ F: include/linux/of_pci.h F: include/linux/pci* F: include/uapi/linux/pci* +PCIE BANDWIDTH CONTROLLER +M: Ilpo Järvinen +L: linux-pci@vger.kernel.org +S: Supported +F: drivers/pci/pcie/bwctrl.c + PCIE DRIVER FOR AMAZON ANNAPURNA LABS M: Jonathan Chocron L: linux-pci@vger.kernel.org diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index dcdbfcf404dd..d603a7aa7483 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -19,6 +19,8 @@ #include #include #include + +#include "../pci.h" #include "pciehp.h" /* The following routines constitute the bulk of the @@ -127,6 +129,9 @@ static void remove_board(struct controller *ctrl, bool safe_removal) pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, INDICATOR_NOOP); + + /* Don't carry LBMS indications across */ + pcie_reset_lbms_count(ctrl->pcie->port); } static int pciehp_enable_slot(struct controller *ctrl); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 3d67e8b50ba2..f85f380cdb9b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4740,7 +4740,7 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt) * to track link speed or width changes made by hardware itself * in attempt to correct unreliable link operation. */ - pcie_capability_write_word(pdev, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); + pcie_reset_lbms_count(pdev); return rc; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 8137ad2fead8..8ec189c8f3f5 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -698,6 +698,17 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { } static inline void pcie_ecrc_get_policy(char *str) { } #endif +#ifdef CONFIG_PCIEPORTBUS +void pcie_reset_lbms_count(struct pci_dev *port); +int pcie_lbms_count(struct pci_dev *port, unsigned long *val); +#else +static inline void pcie_reset_lbms_count(struct pci_dev *port) {} +static inline int pcie_lbms_count(struct pci_dev *port, unsigned long *val) +{ + return -EOPNOTSUPP; +} +#endif + struct pci_dev_reset_methods { u16 vendor; u16 device; diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 6461aa93fe76..53ccab62314d 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -4,7 +4,7 @@ pcieportdrv-y := portdrv.o rcec.o -obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o +obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o obj-y += aspm.o obj-$(CONFIG_PCIEAER) += aer.o err.o diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c new file mode 100644 index 000000000000..b3da24900271 --- /dev/null +++ b/drivers/pci/pcie/bwctrl.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * PCIe bandwidth controller + * + * Author: Alexandru Gagniuc + * + * Copyright (C) 2019 Dell Inc + * Copyright (C) 2023-2024 Intel Corporation + * + * This service port driver hooks into the Bandwidth Notification interrupt + * watching for changes or links becoming degraded in operation. It updates + * the cached Current Link Speed that is exposed to user space through sysfs. + */ + +#define dev_fmt(fmt) "bwctrl: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../pci.h" +#include "portdrv.h" + +/** + * struct pcie_bwctrl_data - PCIe bandwidth controller + * @lbms_count: Count for LBMS (since last reset) + */ +struct pcie_bwctrl_data { + atomic_t lbms_count; +}; + +/* Prevents port removal during LBMS count accessors */ +static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem); + +static void pcie_bwnotif_enable(struct pcie_device *srv) +{ + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + struct pci_dev *port = srv->port; + u16 link_status; + int ret; + + /* Count LBMS seen so far as one */ + ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); + if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) + atomic_inc(&data->lbms_count); + + pcie_capability_set_word(port, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); + pcie_capability_write_word(port, PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); + + /* + * Update after enabling notifications & clearing status bits ensures + * link speed is up to date. + */ + pcie_update_link_speed(port->subordinate); +} + +static void pcie_bwnotif_disable(struct pci_dev *port) +{ + pcie_capability_clear_word(port, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); +} + +static irqreturn_t pcie_bwnotif_irq(int irq, void *context) +{ + struct pcie_device *srv = context; + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + struct pci_dev *port = srv->port; + u16 link_status, events; + int ret; + + ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); + if (ret != PCIBIOS_SUCCESSFUL) + return IRQ_NONE; + + events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); + if (!events) + return IRQ_NONE; + + if (events & PCI_EXP_LNKSTA_LBMS) + atomic_inc(&data->lbms_count); + + pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); + + /* + * Interrupts will not be triggered from any further Link Speed + * change until LBMS is cleared by the write. Therefore, re-read the + * speed (inside pcie_update_link_speed()) after LBMS has been + * cleared to avoid missing link speed changes. + */ + pcie_update_link_speed(port->subordinate); + + return IRQ_HANDLED; +} + +void pcie_reset_lbms_count(struct pci_dev *port) +{ + struct pcie_bwctrl_data *data; + + guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); + data = port->link_bwctrl; + if (data) + atomic_set(&data->lbms_count, 0); + else + pcie_capability_write_word(port, PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_LBMS); +} + +int pcie_lbms_count(struct pci_dev *port, unsigned long *val) +{ + struct pcie_bwctrl_data *data; + + guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); + data = port->link_bwctrl; + if (!data) + return -ENOTTY; + + *val = atomic_read(&data->lbms_count); + + return 0; +} + +static int pcie_bwnotif_probe(struct pcie_device *srv) +{ + struct pci_dev *port = srv->port; + int ret; + + struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device, + sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + ret = devm_request_irq(&srv->device, srv->irq, pcie_bwnotif_irq, + IRQF_SHARED, "PCIe bwctrl", srv); + if (ret) + return ret; + + scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { + port->link_bwctrl = no_free_ptr(data); + pcie_bwnotif_enable(srv); + } + + pci_dbg(port, "enabled with IRQ %d\n", srv->irq); + + return 0; +} + +static void pcie_bwnotif_remove(struct pcie_device *srv) +{ + pcie_bwnotif_disable(srv->port); + scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) + srv->port->link_bwctrl = NULL; +} + +static int pcie_bwnotif_suspend(struct pcie_device *srv) +{ + pcie_bwnotif_disable(srv->port); + return 0; +} + +static int pcie_bwnotif_resume(struct pcie_device *srv) +{ + pcie_bwnotif_enable(srv); + return 0; +} + +static struct pcie_port_service_driver pcie_bwctrl_driver = { + .name = "pcie_bwctrl", + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_BWCTRL, + .probe = pcie_bwnotif_probe, + .suspend = pcie_bwnotif_suspend, + .resume = pcie_bwnotif_resume, + .remove = pcie_bwnotif_remove, +}; + +int __init pcie_bwctrl_init(void) +{ + return pcie_port_service_register(&pcie_bwctrl_driver); +} diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c index 6af5e0425872..5e10306b6308 100644 --- a/drivers/pci/pcie/portdrv.c +++ b/drivers/pci/pcie/portdrv.c @@ -68,7 +68,7 @@ static int pcie_message_numbers(struct pci_dev *dev, int mask, */ if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP | - PCIE_PORT_SERVICE_BWNOTIF)) { + PCIE_PORT_SERVICE_BWCTRL)) { pcie_capability_read_word(dev, PCI_EXP_FLAGS, ®16); *pme = FIELD_GET(PCI_EXP_FLAGS_IRQ, reg16); nvec = *pme + 1; @@ -150,11 +150,11 @@ static int pcie_port_enable_irq_vec(struct pci_dev *dev, int *irqs, int mask) /* PME, hotplug and bandwidth notification share an MSI/MSI-X vector */ if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP | - PCIE_PORT_SERVICE_BWNOTIF)) { + PCIE_PORT_SERVICE_BWCTRL)) { pcie_irq = pci_irq_vector(dev, pme); irqs[PCIE_PORT_SERVICE_PME_SHIFT] = pcie_irq; irqs[PCIE_PORT_SERVICE_HP_SHIFT] = pcie_irq; - irqs[PCIE_PORT_SERVICE_BWNOTIF_SHIFT] = pcie_irq; + irqs[PCIE_PORT_SERVICE_BWCTRL_SHIFT] = pcie_irq; } if (mask & PCIE_PORT_SERVICE_AER) @@ -271,7 +271,7 @@ static int get_port_device_capability(struct pci_dev *dev) pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &linkcap); if (linkcap & PCI_EXP_LNKCAP_LBNC) - services |= PCIE_PORT_SERVICE_BWNOTIF; + services |= PCIE_PORT_SERVICE_BWCTRL; } return services; @@ -828,6 +828,7 @@ static void __init pcie_init_services(void) pcie_aer_init(); pcie_pme_init(); pcie_dpc_init(); + pcie_bwctrl_init(); pcie_hp_init(); } diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 12c89ea0313b..bd29d1cc7b8b 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -20,8 +20,8 @@ #define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT) #define PCIE_PORT_SERVICE_DPC_SHIFT 3 /* Downstream Port Containment */ #define PCIE_PORT_SERVICE_DPC (1 << PCIE_PORT_SERVICE_DPC_SHIFT) -#define PCIE_PORT_SERVICE_BWNOTIF_SHIFT 4 /* Bandwidth notification */ -#define PCIE_PORT_SERVICE_BWNOTIF (1 << PCIE_PORT_SERVICE_BWNOTIF_SHIFT) +#define PCIE_PORT_SERVICE_BWCTRL_SHIFT 4 /* Bandwidth Controller (notifications) */ +#define PCIE_PORT_SERVICE_BWCTRL (1 << PCIE_PORT_SERVICE_BWCTRL_SHIFT) #define PCIE_PORT_DEVICE_MAXSERVICES 5 @@ -51,6 +51,8 @@ int pcie_dpc_init(void); static inline int pcie_dpc_init(void) { return 0; } #endif +int pcie_bwctrl_init(void); + /* Port Type */ #define PCIE_ANY_PORT (~0) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index a560ea403b8e..e6d502dca939 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -35,7 +35,14 @@ static bool pcie_lbms_seen(struct pci_dev *dev, u16 lnksta) { - return lnksta & PCI_EXP_LNKSTA_LBMS; + unsigned long count; + int ret; + + ret = pcie_lbms_count(dev, &count); + if (ret < 0) + return lnksta & PCI_EXP_LNKSTA_LBMS; + + return count > 0; } /* diff --git a/include/linux/pci.h b/include/linux/pci.h index 99c6fa30d25b..579b28833429 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -313,6 +313,7 @@ struct pci_vpd { }; struct irq_affinity; +struct pcie_bwctrl_data; struct pcie_link_state; struct pci_sriov; struct pci_p2pdma; @@ -502,6 +503,7 @@ struct pci_dev { unsigned int dpc_rp_extensions:1; u8 dpc_rp_log_size; #endif + struct pcie_bwctrl_data *link_bwctrl; #ifdef CONFIG_PCI_ATS union { struct pci_sriov *sriov; /* PF: SR-IOV info */ -- cgit v1.2.3 From de9a6c8d5dbfedb5eb3722c822da0490f6a59a45 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 18 Oct 2024 17:47:53 +0300 Subject: PCI/bwctrl: Add pcie_set_target_speed() to set PCIe Link Speed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, PCIe Link Speeds are adjusted by custom code rather than in a common function provided in PCI core. The PCIe bandwidth controller (bwctrl) introduces an in-kernel API, pcie_set_target_speed(), to set PCIe Link Speed. Convert Target Speed quirk to use the new API. The Target Speed quirk runs very early when bwctrl is not yet probed for a Port and can also run later when bwctrl is already setup for the Port, which requires the per port mutex (set_speed_mutex) to be only taken if the bwctrl setup is already complete. The new API is also intended to be used in an upcoming commit that adds a thermal cooling device to throttle PCIe bandwidth when thermal thresholds are reached. The PCIe bandwidth control procedure is as follows. The highest speed supported by the Port and the PCIe device which is not higher than the requested speed is selected and written into the Target Link Speed in the Link Control 2 Register. Then bandwidth controller retrains the PCIe Link. Bandwidth Notifications enable the cur_bus_speed in the struct pci_bus to keep track PCIe Link Speed changes. While Bandwidth Notifications should also be generated when bandwidth controller alters the PCIe Link Speed, a few platforms do not deliver LMBS interrupt after Link Training as expected. Thus, after changing the Link Speed, bandwidth controller makes additional read for the Link Status Register to ensure cur_bus_speed is consistent with the new PCIe Link Speed. Link: https://lore.kernel.org/r/20241018144755.7875-8-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: squash devm_mutex_init() error checking from https://lore.kernel.org/r/20241030163139.2111689-1-andriy.shevchenko@linux.intel.com, drop export of pcie_set_target_speed()] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/pci.h | 20 ++++++ drivers/pci/pcie/bwctrl.c | 180 ++++++++++++++++++++++++++++++++++++++++++++-- drivers/pci/quirks.c | 17 ++--- include/linux/pci.h | 10 +++ 4 files changed, 208 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 8ec189c8f3f5..7573f81f58c4 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -331,6 +331,17 @@ void pci_disable_bridge_window(struct pci_dev *dev); struct pci_bus *pci_bus_get(struct pci_bus *bus); void pci_bus_put(struct pci_bus *bus); +#define PCIE_LNKCAP_SLS2SPEED(lnkcap) \ +({ \ + ((lnkcap) == PCI_EXP_LNKCAP_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ + (lnkcap) == PCI_EXP_LNKCAP_SLS_32_0GB ? PCIE_SPEED_32_0GT : \ + (lnkcap) == PCI_EXP_LNKCAP_SLS_16_0GB ? PCIE_SPEED_16_0GT : \ + (lnkcap) == PCI_EXP_LNKCAP_SLS_8_0GB ? PCIE_SPEED_8_0GT : \ + (lnkcap) == PCI_EXP_LNKCAP_SLS_5_0GB ? PCIE_SPEED_5_0GT : \ + (lnkcap) == PCI_EXP_LNKCAP_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ + PCI_SPEED_UNKNOWN); \ +}) + /* PCIe link information from Link Capabilities 2 */ #define PCIE_LNKCAP2_SLS2SPEED(lnkcap2) \ ((lnkcap2) & PCI_EXP_LNKCAP2_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ @@ -341,6 +352,15 @@ void pci_bus_put(struct pci_bus *bus); (lnkcap2) & PCI_EXP_LNKCAP2_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ PCI_SPEED_UNKNOWN) +#define PCIE_LNKCTL2_TLS2SPEED(lnkctl2) \ + ((lnkctl2) == PCI_EXP_LNKCTL2_TLS_64_0GT ? PCIE_SPEED_64_0GT : \ + (lnkctl2) == PCI_EXP_LNKCTL2_TLS_32_0GT ? PCIE_SPEED_32_0GT : \ + (lnkctl2) == PCI_EXP_LNKCTL2_TLS_16_0GT ? PCIE_SPEED_16_0GT : \ + (lnkctl2) == PCI_EXP_LNKCTL2_TLS_8_0GT ? PCIE_SPEED_8_0GT : \ + (lnkctl2) == PCI_EXP_LNKCTL2_TLS_5_0GT ? PCIE_SPEED_5_0GT : \ + (lnkctl2) == PCI_EXP_LNKCTL2_TLS_2_5GT ? PCIE_SPEED_2_5GT : \ + PCI_SPEED_UNKNOWN) + /* PCIe speed to Mb/s reduced by encoding overhead */ #define PCIE_SPEED2MBS_ENC(speed) \ ((speed) == PCIE_SPEED_64_0GT ? 64000*1/1 : \ diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c index b3da24900271..3cd3e2d066c9 100644 --- a/drivers/pci/pcie/bwctrl.c +++ b/drivers/pci/pcie/bwctrl.c @@ -7,6 +7,11 @@ * Copyright (C) 2019 Dell Inc * Copyright (C) 2023-2024 Intel Corporation * + * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds + * and notify the operating system when the Link Width or Speed changes. The + * notification capability is required for all Root Ports and Downstream + * Ports supporting Link Width wider than x1 and/or multiple Link Speeds. + * * This service port driver hooks into the Bandwidth Notification interrupt * watching for changes or links becoming degraded in operation. It updates * the cached Current Link Speed that is exposed to user space through sysfs. @@ -15,9 +20,12 @@ #define dev_fmt(fmt) "bwctrl: " fmt #include +#include +#include #include #include #include +#include #include #include #include @@ -28,14 +36,166 @@ /** * struct pcie_bwctrl_data - PCIe bandwidth controller + * @set_speed_mutex: Serializes link speed changes * @lbms_count: Count for LBMS (since last reset) */ struct pcie_bwctrl_data { + struct mutex set_speed_mutex; atomic_t lbms_count; }; -/* Prevents port removal during LBMS count accessors */ +/* + * Prevent port removal during LBMS count accessors and Link Speed changes. + * + * These have to be differentiated because pcie_bwctrl_change_speed() calls + * pcie_retrain_link() which uses LBMS count reset accessor on success + * (using just one rwsem triggers "possible recursive locking detected" + * warning). + */ static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem); +static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem); + +static bool pcie_valid_speed(enum pci_bus_speed speed) +{ + return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT); +} + +static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed) +{ + static const u8 speed_conv[] = { + [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT, + [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT, + [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT, + [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT, + [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT, + [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT, + }; + + if (WARN_ON_ONCE(!pcie_valid_speed(speed))) + return 0; + + return speed_conv[speed]; +} + +static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds) +{ + return __fls(supported_speeds); +} + +/** + * pcie_bwctrl_select_speed - Select Target Link Speed + * @port: PCIe Port + * @speed_req: Requested PCIe Link Speed + * + * Select Target Link Speed by take into account Supported Link Speeds of + * both the Root Port and the Endpoint. + * + * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.) + */ +static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req) +{ + struct pci_bus *bus = port->subordinate; + u8 desired_speeds, supported_speeds; + struct pci_dev *dev; + + desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req), + __fls(PCI_EXP_LNKCAP2_SLS_2_5GB)); + + supported_speeds = port->supported_speeds; + if (bus) { + down_read(&pci_bus_sem); + dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list); + if (dev) + supported_speeds &= dev->supported_speeds; + up_read(&pci_bus_sem); + } + if (!supported_speeds) + return PCI_EXP_LNKCAP2_SLS_2_5GB; + + return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds); +} + +static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt) +{ + int ret; + + ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, target_speed); + if (ret != PCIBIOS_SUCCESSFUL) + return pcibios_err_to_errno(ret); + + ret = pcie_retrain_link(port, use_lt); + if (ret < 0) + return ret; + + /* + * Ensure link speed updates also with platforms that have problems + * with notifications. + */ + if (port->subordinate) + pcie_update_link_speed(port->subordinate); + + return 0; +} + +/** + * pcie_set_target_speed - Set downstream Link Speed for PCIe Port + * @port: PCIe Port + * @speed_req: Requested PCIe Link Speed + * @use_lt: Wait for the LT or DLLLA bit to detect the end of link training + * + * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be + * adjusted downwards to the best speed supported by both the Port and PCIe + * Device underneath it. + * + * Return: + * * 0 - on success + * * -EINVAL - @speed_req is not a PCIe Link Speed + * * -ENODEV - @port is not controllable + * * -ETIMEDOUT - changing Link Speed took too long + * * -EAGAIN - Link Speed was changed but @speed_req was not achieved + */ +int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, + bool use_lt) +{ + struct pci_bus *bus = port->subordinate; + u16 target_speed; + int ret; + + if (WARN_ON_ONCE(!pcie_valid_speed(speed_req))) + return -EINVAL; + + if (bus && bus->cur_bus_speed == speed_req) + return 0; + + target_speed = pcie_bwctrl_select_speed(port, speed_req); + + scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) { + struct pcie_bwctrl_data *data = port->link_bwctrl; + + /* + * port->link_bwctrl is NULL during initial scan when called + * e.g. from the Target Speed quirk. + */ + if (data) + mutex_lock(&data->set_speed_mutex); + + ret = pcie_bwctrl_change_speed(port, target_speed, use_lt); + + if (data) + mutex_unlock(&data->set_speed_mutex); + } + + /* + * Despite setting higher speed into the Target Link Speed, empty + * bus won't train to 5GT+ speeds. + */ + if (!ret && bus && bus->cur_bus_speed != speed_req && + !list_empty(&bus->devices)) + ret = -EAGAIN; + + return ret; +} static void pcie_bwnotif_enable(struct pcie_device *srv) { @@ -136,14 +296,20 @@ static int pcie_bwnotif_probe(struct pcie_device *srv) if (!data) return -ENOMEM; + ret = devm_mutex_init(&srv->device, &data->set_speed_mutex); + if (ret) + return ret; + ret = devm_request_irq(&srv->device, srv->irq, pcie_bwnotif_irq, IRQF_SHARED, "PCIe bwctrl", srv); if (ret) return ret; - scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { - port->link_bwctrl = no_free_ptr(data); - pcie_bwnotif_enable(srv); + scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { + scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { + port->link_bwctrl = no_free_ptr(data); + pcie_bwnotif_enable(srv); + } } pci_dbg(port, "enabled with IRQ %d\n", srv->irq); @@ -154,8 +320,10 @@ static int pcie_bwnotif_probe(struct pcie_device *srv) static void pcie_bwnotif_remove(struct pcie_device *srv) { pcie_bwnotif_disable(srv->port); - scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) - srv->port->link_bwctrl = NULL; + + scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) + scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) + srv->port->link_bwctrl = NULL; } static int pcie_bwnotif_suspend(struct pcie_device *srv) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e6d502dca939..dcf1c86a5488 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -113,16 +113,11 @@ int pcie_failed_link_retrain(struct pci_dev *dev) pci_info(dev, "broken device, retraining non-functional downstream link at 2.5GT/s\n"); - lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS; - lnkctl2 |= PCI_EXP_LNKCTL2_TLS_2_5GT; - pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2); - - ret = pcie_retrain_link(dev, false); + ret = pcie_set_target_speed(dev, PCIE_SPEED_2_5GT, false); if (ret) { pci_info(dev, "retraining failed\n"); - pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, - oldlnkctl2); - pcie_retrain_link(dev, true); + pcie_set_target_speed(dev, PCIE_LNKCTL2_TLS2SPEED(oldlnkctl2), + true); return ret; } @@ -136,11 +131,7 @@ int pcie_failed_link_retrain(struct pci_dev *dev) pci_info(dev, "removing 2.5GT/s downstream link speed restriction\n"); pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap); - lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS; - lnkctl2 |= lnkcap & PCI_EXP_LNKCAP_SLS; - pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2); - - ret = pcie_retrain_link(dev, false); + ret = pcie_set_target_speed(dev, PCIE_LNKCAP_SLS2SPEED(lnkcap), false); if (ret) { pci_info(dev, "retraining failed\n"); return ret; diff --git a/include/linux/pci.h b/include/linux/pci.h index 579b28833429..4f02d9f91399 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1797,9 +1797,19 @@ static inline int pci_irqd_intx_xlate(struct irq_domain *d, #ifdef CONFIG_PCIEPORTBUS extern bool pcie_ports_disabled; extern bool pcie_ports_native; + +int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, + bool use_lt); #else #define pcie_ports_disabled true #define pcie_ports_native false + +static inline int pcie_set_target_speed(struct pci_dev *port, + enum pci_bus_speed speed_req, + bool use_lt) +{ + return -EOPNOTSUPP; +} #endif #define PCIE_LINK_STATE_L0S (BIT(0) | BIT(1)) /* Upstr/dwnstr L0s */ -- cgit v1.2.3 From d278b098282d1327f6e1be82aacb18457a4d244d Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 18 Oct 2024 17:47:54 +0300 Subject: thermal: Add PCIe cooling driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a thermal cooling driver to provide path to access PCIe bandwidth controller using the usual thermal interfaces. A cooling device is instantiated for controllable PCIe Ports from the bwctrl service driver. If registering the cooling device fails, allow bwctrl's probe to succeed regardless. As cdev in that case contains IS_ERR() pseudo "pointer", clean that up inside the probe function so the remove side doesn't need to suddenly make an odd looking IS_ERR() check. The thermal side state 0 means no throttling, i.e., maximum supported PCIe Link Speed. Link: https://lore.kernel.org/r/20241018144755.7875-9-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: dropped data->cdev test per https://lore.kernel.org/r/ZzRm1SJTwEMRsAr8@wunner.de] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Acked-by: Rafael J. Wysocki # From the cooling device interface perspective --- MAINTAINERS | 2 ++ drivers/pci/pcie/bwctrl.c | 12 +++++++ drivers/thermal/Kconfig | 9 +++++ drivers/thermal/Makefile | 2 ++ drivers/thermal/pcie_cooling.c | 80 ++++++++++++++++++++++++++++++++++++++++++ include/linux/pci-bwctrl.h | 28 +++++++++++++++ 6 files changed, 133 insertions(+) create mode 100644 drivers/thermal/pcie_cooling.c create mode 100644 include/linux/pci-bwctrl.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 8c555b3325d6..393ed7ce5ea1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17938,6 +17938,8 @@ M: Ilpo Järvinen L: linux-pci@vger.kernel.org S: Supported F: drivers/pci/pcie/bwctrl.c +F: drivers/thermal/pcie_cooling.c +F: include/linux/pci-bwctrl.h PCIE DRIVER FOR AMAZON ANNAPURNA LABS M: Jonathan Chocron diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c index 3cd3e2d066c9..b59cacc740fa 100644 --- a/drivers/pci/pcie/bwctrl.c +++ b/drivers/pci/pcie/bwctrl.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -38,10 +39,12 @@ * struct pcie_bwctrl_data - PCIe bandwidth controller * @set_speed_mutex: Serializes link speed changes * @lbms_count: Count for LBMS (since last reset) + * @cdev: Thermal cooling device associated with the port */ struct pcie_bwctrl_data { struct mutex set_speed_mutex; atomic_t lbms_count; + struct thermal_cooling_device *cdev; }; /* @@ -314,11 +317,20 @@ static int pcie_bwnotif_probe(struct pcie_device *srv) pci_dbg(port, "enabled with IRQ %d\n", srv->irq); + /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */ + port->link_bwctrl->cdev = pcie_cooling_device_register(port); + if (IS_ERR(port->link_bwctrl->cdev)) + port->link_bwctrl->cdev = NULL; + return 0; } static void pcie_bwnotif_remove(struct pcie_device *srv) { + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + + pcie_cooling_device_unregister(data->cdev); + pcie_bwnotif_disable(srv->port); scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 61e7ae524b1f..d3f9686e26e7 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -220,6 +220,15 @@ config DEVFREQ_THERMAL If you want this support, you should say Y here. +config PCIE_THERMAL + bool "PCIe cooling support" + depends on PCIEPORTBUS + help + This implements PCIe cooling mechanism through bandwidth reduction + for PCIe devices. + + If you want this support, you should say Y here. + config THERMAL_EMULATION bool "Thermal emulation mode support" help diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 41c4d56beb40..210c16c91461 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -31,6 +31,8 @@ thermal_sys-$(CONFIG_CPU_IDLE_THERMAL) += cpuidle_cooling.o # devfreq cooling thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o +thermal_sys-$(CONFIG_PCIE_THERMAL) += pcie_cooling.o + obj-$(CONFIG_K3_THERMAL) += k3_bandgap.o k3_j72xx_bandgap.o # platform thermal drivers obj-y += broadcom/ diff --git a/drivers/thermal/pcie_cooling.c b/drivers/thermal/pcie_cooling.c new file mode 100644 index 000000000000..a876d64f1582 --- /dev/null +++ b/drivers/thermal/pcie_cooling.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PCIe cooling device + * + * Copyright (C) 2023-2024 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define COOLING_DEV_TYPE_PREFIX "PCIe_Port_Link_Speed_" + +static int pcie_cooling_get_max_level(struct thermal_cooling_device *cdev, unsigned long *state) +{ + struct pci_dev *port = cdev->devdata; + + /* cooling state 0 is same as the maximum PCIe speed */ + *state = port->subordinate->max_bus_speed - PCIE_SPEED_2_5GT; + + return 0; +} + +static int pcie_cooling_get_cur_level(struct thermal_cooling_device *cdev, unsigned long *state) +{ + struct pci_dev *port = cdev->devdata; + + /* cooling state 0 is same as the maximum PCIe speed */ + *state = cdev->max_state - (port->subordinate->cur_bus_speed - PCIE_SPEED_2_5GT); + + return 0; +} + +static int pcie_cooling_set_cur_level(struct thermal_cooling_device *cdev, unsigned long state) +{ + struct pci_dev *port = cdev->devdata; + enum pci_bus_speed speed; + + /* cooling state 0 is same as the maximum PCIe speed */ + speed = (cdev->max_state - state) + PCIE_SPEED_2_5GT; + + return pcie_set_target_speed(port, speed, true); +} + +static struct thermal_cooling_device_ops pcie_cooling_ops = { + .get_max_state = pcie_cooling_get_max_level, + .get_cur_state = pcie_cooling_get_cur_level, + .set_cur_state = pcie_cooling_set_cur_level, +}; + +struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port) +{ + char *name __free(kfree) = + kasprintf(GFP_KERNEL, COOLING_DEV_TYPE_PREFIX "%s", pci_name(port)); + if (!name) + return ERR_PTR(-ENOMEM); + + return thermal_cooling_device_register(name, port, &pcie_cooling_ops); +} + +void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev) +{ + thermal_cooling_device_unregister(cdev); +} + +/* For bus_speed <-> state arithmetic */ +static_assert(PCIE_SPEED_2_5GT + 1 == PCIE_SPEED_5_0GT); +static_assert(PCIE_SPEED_5_0GT + 1 == PCIE_SPEED_8_0GT); +static_assert(PCIE_SPEED_8_0GT + 1 == PCIE_SPEED_16_0GT); +static_assert(PCIE_SPEED_16_0GT + 1 == PCIE_SPEED_32_0GT); +static_assert(PCIE_SPEED_32_0GT + 1 == PCIE_SPEED_64_0GT); + +MODULE_AUTHOR("Ilpo Järvinen "); +MODULE_DESCRIPTION("PCIe cooling driver"); diff --git a/include/linux/pci-bwctrl.h b/include/linux/pci-bwctrl.h new file mode 100644 index 000000000000..cee07127455b --- /dev/null +++ b/include/linux/pci-bwctrl.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * PCIe bandwidth controller + * + * Copyright (C) 2023-2024 Intel Corporation + */ + +#ifndef LINUX_PCI_BWCTRL_H +#define LINUX_PCI_BWCTRL_H + +#include + +struct thermal_cooling_device; + +#ifdef CONFIG_PCIE_THERMAL +struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port); +void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev); +#else +static inline struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port) +{ + return NULL; +} +static inline void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev) +{ +} +#endif + +#endif -- cgit v1.2.3 From 28b6acd75e3cefbe746ec7402c7ff4fdb114f327 Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Thu, 14 Nov 2024 17:10:32 +0100 Subject: PCI: endpoint: Fix pci_epc_map map_size kerneldoc string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because some endpoint controllers have requirements on the alignment of the controller physical memory address that must be used to map a RC PCI address region, the map PCI start address is not necessarily the desired PCI base address to be mapped. This can result in map_pci_addr being lower than pci_addr as documented. This results in map_size covering the range map_pci_addr..pci_addr+pci_size. The old text had the pci_addr twice instead of map_pci_addr..pci_addr, so replace the erroneous kerneldoc string to reflect the actual range. Link: https://lore.kernel.org/r/20241114161032.3046202-1-rick.wertenbroek@gmail.com Signed-off-by: Rick Wertenbroek Signed-off-by: Krzysztof Wilczyński --- include/linux/pci-epc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index de8cc3658220..e818e3fdcded 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -40,7 +40,7 @@ pci_epc_interface_string(enum pci_epc_interface_type type) * @map_pci_addr: RC PCI address used as the first address mapped (may be lower * than @pci_addr) * @map_size: size of the controller memory needed for mapping the RC PCI address - * range @pci_addr..@pci_addr+@pci_size + * range @map_pci_addr..@pci_addr+@pci_size * @phys_base: base physical address of the allocated EPC memory for mapping the * RC PCI address range * @phys_addr: physical address at which @pci_addr is mapped -- cgit v1.2.3 From b88cbaaa6fa109c2eb455d8fe2a318de0d197ea2 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 15 Nov 2024 15:44:27 -0600 Subject: PCI/pwrctrl: Rename pwrctl files to pwrctrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To slightly reduce confusion between "pwrctl" (the power controller and power sequencing framework) and "bwctrl" (the bandwidth controller), rename "pwrctl" to "pwrctrl" so they use the same "ctrl" suffix. Rename drivers/pci/pwrctl/ to drivers/pci/pwrctrl/, including the related MAINTAINERS, include file (include/linux/pci-pwrctl.h), Makefile, and Kconfig changes. This is the minimal rename of files only. A subsequent commit will rename functions and data structures. Link: https://lore.kernel.org/r/20241115214428.2061153-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Acked-by: Krzysztof Wilczyński --- MAINTAINERS | 4 +- drivers/pci/Kconfig | 2 +- drivers/pci/Makefile | 2 +- drivers/pci/pwrctl/Kconfig | 12 --- drivers/pci/pwrctl/Makefile | 6 -- drivers/pci/pwrctl/core.c | 147 ------------------------------- drivers/pci/pwrctl/pci-pwrctl-pwrseq.c | 94 -------------------- drivers/pci/pwrctrl/Kconfig | 12 +++ drivers/pci/pwrctrl/Makefile | 6 ++ drivers/pci/pwrctrl/core.c | 147 +++++++++++++++++++++++++++++++ drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c | 94 ++++++++++++++++++++ include/linux/pci-pwrctl.h | 54 ------------ include/linux/pci-pwrctrl.h | 54 ++++++++++++ 13 files changed, 317 insertions(+), 317 deletions(-) delete mode 100644 drivers/pci/pwrctl/Kconfig delete mode 100644 drivers/pci/pwrctl/Makefile delete mode 100644 drivers/pci/pwrctl/core.c delete mode 100644 drivers/pci/pwrctl/pci-pwrctl-pwrseq.c create mode 100644 drivers/pci/pwrctrl/Kconfig create mode 100644 drivers/pci/pwrctrl/Makefile create mode 100644 drivers/pci/pwrctrl/core.c create mode 100644 drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c delete mode 100644 include/linux/pci-pwrctl.h create mode 100644 include/linux/pci-pwrctrl.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index c27f3190737f..1a6096135424 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17910,8 +17910,8 @@ M: Bartosz Golaszewski L: linux-pci@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git -F: drivers/pci/pwrctl/* -F: include/linux/pci-pwrctl.h +F: drivers/pci/pwrctrl/* +F: include/linux/pci-pwrctrl.h PCI SUBSYSTEM M: Bjorn Helgaas diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 0d94e4a967d8..e1c025698a28 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -305,6 +305,6 @@ source "drivers/pci/hotplug/Kconfig" source "drivers/pci/controller/Kconfig" source "drivers/pci/endpoint/Kconfig" source "drivers/pci/switch/Kconfig" -source "drivers/pci/pwrctl/Kconfig" +source "drivers/pci/pwrctrl/Kconfig" endif diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 374c5c06d92f..39a07890abd1 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_PCI) += access.o bus.o probe.o host-bridge.o \ obj-$(CONFIG_PCI) += msi/ obj-$(CONFIG_PCI) += pcie/ -obj-$(CONFIG_PCI) += pwrctl/ +obj-$(CONFIG_PCI) += pwrctrl/ ifdef CONFIG_PCI obj-$(CONFIG_PROC_FS) += proc.o diff --git a/drivers/pci/pwrctl/Kconfig b/drivers/pci/pwrctl/Kconfig deleted file mode 100644 index 54589bb2403b..000000000000 --- a/drivers/pci/pwrctl/Kconfig +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config HAVE_PWRCTL - bool - -config PCI_PWRCTL - tristate - -config PCI_PWRCTL_PWRSEQ - tristate - select POWER_SEQUENCING - select PCI_PWRCTL diff --git a/drivers/pci/pwrctl/Makefile b/drivers/pci/pwrctl/Makefile deleted file mode 100644 index d308aae4800c..000000000000 --- a/drivers/pci/pwrctl/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_PCI_PWRCTL) += pci-pwrctl-core.o -pci-pwrctl-core-y := core.o - -obj-$(CONFIG_PCI_PWRCTL_PWRSEQ) += pci-pwrctl-pwrseq.o diff --git a/drivers/pci/pwrctl/core.c b/drivers/pci/pwrctl/core.c deleted file mode 100644 index bb5a23712434..000000000000 --- a/drivers/pci/pwrctl/core.c +++ /dev/null @@ -1,147 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2024 Linaro Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include - -static int pci_pwrctl_notify(struct notifier_block *nb, unsigned long action, - void *data) -{ - struct pci_pwrctl *pwrctl = container_of(nb, struct pci_pwrctl, nb); - struct device *dev = data; - - if (dev_fwnode(dev) != dev_fwnode(pwrctl->dev)) - return NOTIFY_DONE; - - switch (action) { - case BUS_NOTIFY_ADD_DEVICE: - /* - * We will have two struct device objects bound to two different - * drivers on different buses but consuming the same DT node. We - * must not bind the pins twice in this case but only once for - * the first device to be added. - * - * If we got here then the PCI device is the second after the - * power control platform device. Mark its OF node as reused. - */ - dev->of_node_reused = true; - break; - } - - return NOTIFY_DONE; -} - -static void rescan_work_func(struct work_struct *work) -{ - struct pci_pwrctl *pwrctl = container_of(work, struct pci_pwrctl, work); - - pci_lock_rescan_remove(); - pci_rescan_bus(to_pci_dev(pwrctl->dev->parent)->bus); - pci_unlock_rescan_remove(); -} - -/** - * pci_pwrctl_init() - Initialize the PCI power control context struct - * - * @pwrctl: PCI power control data - * @dev: Parent device - */ -void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev) -{ - pwrctl->dev = dev; - INIT_WORK(&pwrctl->work, rescan_work_func); -} -EXPORT_SYMBOL_GPL(pci_pwrctl_init); - -/** - * pci_pwrctl_device_set_ready() - Notify the pwrctl subsystem that the PCI - * device is powered-up and ready to be detected. - * - * @pwrctl: PCI power control data. - * - * Returns: - * 0 on success, negative error number on error. - * - * Note: - * This function returning 0 doesn't mean the device was detected. It means, - * that the bus rescan was successfully started. The device will get bound to - * its PCI driver asynchronously. - */ -int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl) -{ - int ret; - - if (!pwrctl->dev) - return -ENODEV; - - pwrctl->nb.notifier_call = pci_pwrctl_notify; - ret = bus_register_notifier(&pci_bus_type, &pwrctl->nb); - if (ret) - return ret; - - schedule_work(&pwrctl->work); - - return 0; -} -EXPORT_SYMBOL_GPL(pci_pwrctl_device_set_ready); - -/** - * pci_pwrctl_device_unset_ready() - Notify the pwrctl subsystem that the PCI - * device is about to be powered-down. - * - * @pwrctl: PCI power control data. - */ -void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl) -{ - /* - * We don't have to delete the link here. Typically, this function - * is only called when the power control device is being detached. If - * it is being detached then the child PCI device must have already - * been unbound too or the device core wouldn't let us unbind. - */ - bus_unregister_notifier(&pci_bus_type, &pwrctl->nb); -} -EXPORT_SYMBOL_GPL(pci_pwrctl_device_unset_ready); - -static void devm_pci_pwrctl_device_unset_ready(void *data) -{ - struct pci_pwrctl *pwrctl = data; - - pci_pwrctl_device_unset_ready(pwrctl); -} - -/** - * devm_pci_pwrctl_device_set_ready - Managed variant of - * pci_pwrctl_device_set_ready(). - * - * @dev: Device managing this pwrctl provider. - * @pwrctl: PCI power control data. - * - * Returns: - * 0 on success, negative error number on error. - */ -int devm_pci_pwrctl_device_set_ready(struct device *dev, - struct pci_pwrctl *pwrctl) -{ - int ret; - - ret = pci_pwrctl_device_set_ready(pwrctl); - if (ret) - return ret; - - return devm_add_action_or_reset(dev, - devm_pci_pwrctl_device_unset_ready, - pwrctl); -} -EXPORT_SYMBOL_GPL(devm_pci_pwrctl_device_set_ready); - -MODULE_AUTHOR("Bartosz Golaszewski "); -MODULE_DESCRIPTION("PCI Device Power Control core driver"); -MODULE_LICENSE("GPL"); diff --git a/drivers/pci/pwrctl/pci-pwrctl-pwrseq.c b/drivers/pci/pwrctl/pci-pwrctl-pwrseq.c deleted file mode 100644 index a23a4312574b..000000000000 --- a/drivers/pci/pwrctl/pci-pwrctl-pwrseq.c +++ /dev/null @@ -1,94 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2024 Linaro Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct pci_pwrctl_pwrseq_data { - struct pci_pwrctl ctx; - struct pwrseq_desc *pwrseq; -}; - -static void devm_pci_pwrctl_pwrseq_power_off(void *data) -{ - struct pwrseq_desc *pwrseq = data; - - pwrseq_power_off(pwrseq); -} - -static int pci_pwrctl_pwrseq_probe(struct platform_device *pdev) -{ - struct pci_pwrctl_pwrseq_data *data; - struct device *dev = &pdev->dev; - int ret; - - data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - data->pwrseq = devm_pwrseq_get(dev, of_device_get_match_data(dev)); - if (IS_ERR(data->pwrseq)) - return dev_err_probe(dev, PTR_ERR(data->pwrseq), - "Failed to get the power sequencer\n"); - - ret = pwrseq_power_on(data->pwrseq); - if (ret) - return dev_err_probe(dev, ret, - "Failed to power-on the device\n"); - - ret = devm_add_action_or_reset(dev, devm_pci_pwrctl_pwrseq_power_off, - data->pwrseq); - if (ret) - return ret; - - pci_pwrctl_init(&data->ctx, dev); - - ret = devm_pci_pwrctl_device_set_ready(dev, &data->ctx); - if (ret) - return dev_err_probe(dev, ret, - "Failed to register the pwrctl wrapper\n"); - - return 0; -} - -static const struct of_device_id pci_pwrctl_pwrseq_of_match[] = { - { - /* ATH11K in QCA6390 package. */ - .compatible = "pci17cb,1101", - .data = "wlan", - }, - { - /* ATH11K in WCN6855 package. */ - .compatible = "pci17cb,1103", - .data = "wlan", - }, - { - /* ATH12K in WCN7850 package. */ - .compatible = "pci17cb,1107", - .data = "wlan", - }, - { } -}; -MODULE_DEVICE_TABLE(of, pci_pwrctl_pwrseq_of_match); - -static struct platform_driver pci_pwrctl_pwrseq_driver = { - .driver = { - .name = "pci-pwrctl-pwrseq", - .of_match_table = pci_pwrctl_pwrseq_of_match, - }, - .probe = pci_pwrctl_pwrseq_probe, -}; -module_platform_driver(pci_pwrctl_pwrseq_driver); - -MODULE_AUTHOR("Bartosz Golaszewski "); -MODULE_DESCRIPTION("Generic PCI Power Control module for power sequenced devices"); -MODULE_LICENSE("GPL"); diff --git a/drivers/pci/pwrctrl/Kconfig b/drivers/pci/pwrctrl/Kconfig new file mode 100644 index 000000000000..54589bb2403b --- /dev/null +++ b/drivers/pci/pwrctrl/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config HAVE_PWRCTL + bool + +config PCI_PWRCTL + tristate + +config PCI_PWRCTL_PWRSEQ + tristate + select POWER_SEQUENCING + select PCI_PWRCTL diff --git a/drivers/pci/pwrctrl/Makefile b/drivers/pci/pwrctrl/Makefile new file mode 100644 index 000000000000..75c7ce531c7e --- /dev/null +++ b/drivers/pci/pwrctrl/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_PCI_PWRCTL) += pci-pwrctrl-core.o +pci-pwrctrl-core-y := core.o + +obj-$(CONFIG_PCI_PWRCTL_PWRSEQ) += pci-pwrctrl-pwrseq.o diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c new file mode 100644 index 000000000000..88f06e9271ba --- /dev/null +++ b/drivers/pci/pwrctrl/core.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Linaro Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include + +static int pci_pwrctl_notify(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct pci_pwrctl *pwrctl = container_of(nb, struct pci_pwrctl, nb); + struct device *dev = data; + + if (dev_fwnode(dev) != dev_fwnode(pwrctl->dev)) + return NOTIFY_DONE; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + /* + * We will have two struct device objects bound to two different + * drivers on different buses but consuming the same DT node. We + * must not bind the pins twice in this case but only once for + * the first device to be added. + * + * If we got here then the PCI device is the second after the + * power control platform device. Mark its OF node as reused. + */ + dev->of_node_reused = true; + break; + } + + return NOTIFY_DONE; +} + +static void rescan_work_func(struct work_struct *work) +{ + struct pci_pwrctl *pwrctl = container_of(work, struct pci_pwrctl, work); + + pci_lock_rescan_remove(); + pci_rescan_bus(to_pci_dev(pwrctl->dev->parent)->bus); + pci_unlock_rescan_remove(); +} + +/** + * pci_pwrctl_init() - Initialize the PCI power control context struct + * + * @pwrctl: PCI power control data + * @dev: Parent device + */ +void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev) +{ + pwrctl->dev = dev; + INIT_WORK(&pwrctl->work, rescan_work_func); +} +EXPORT_SYMBOL_GPL(pci_pwrctl_init); + +/** + * pci_pwrctl_device_set_ready() - Notify the pwrctl subsystem that the PCI + * device is powered-up and ready to be detected. + * + * @pwrctl: PCI power control data. + * + * Returns: + * 0 on success, negative error number on error. + * + * Note: + * This function returning 0 doesn't mean the device was detected. It means, + * that the bus rescan was successfully started. The device will get bound to + * its PCI driver asynchronously. + */ +int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl) +{ + int ret; + + if (!pwrctl->dev) + return -ENODEV; + + pwrctl->nb.notifier_call = pci_pwrctl_notify; + ret = bus_register_notifier(&pci_bus_type, &pwrctl->nb); + if (ret) + return ret; + + schedule_work(&pwrctl->work); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_pwrctl_device_set_ready); + +/** + * pci_pwrctl_device_unset_ready() - Notify the pwrctl subsystem that the PCI + * device is about to be powered-down. + * + * @pwrctl: PCI power control data. + */ +void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl) +{ + /* + * We don't have to delete the link here. Typically, this function + * is only called when the power control device is being detached. If + * it is being detached then the child PCI device must have already + * been unbound too or the device core wouldn't let us unbind. + */ + bus_unregister_notifier(&pci_bus_type, &pwrctl->nb); +} +EXPORT_SYMBOL_GPL(pci_pwrctl_device_unset_ready); + +static void devm_pci_pwrctl_device_unset_ready(void *data) +{ + struct pci_pwrctl *pwrctl = data; + + pci_pwrctl_device_unset_ready(pwrctl); +} + +/** + * devm_pci_pwrctl_device_set_ready - Managed variant of + * pci_pwrctl_device_set_ready(). + * + * @dev: Device managing this pwrctl provider. + * @pwrctl: PCI power control data. + * + * Returns: + * 0 on success, negative error number on error. + */ +int devm_pci_pwrctl_device_set_ready(struct device *dev, + struct pci_pwrctl *pwrctl) +{ + int ret; + + ret = pci_pwrctl_device_set_ready(pwrctl); + if (ret) + return ret; + + return devm_add_action_or_reset(dev, + devm_pci_pwrctl_device_unset_ready, + pwrctl); +} +EXPORT_SYMBOL_GPL(devm_pci_pwrctl_device_set_ready); + +MODULE_AUTHOR("Bartosz Golaszewski "); +MODULE_DESCRIPTION("PCI Device Power Control core driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c new file mode 100644 index 000000000000..cc19ad61dd6e --- /dev/null +++ b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Linaro Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct pci_pwrctl_pwrseq_data { + struct pci_pwrctl ctx; + struct pwrseq_desc *pwrseq; +}; + +static void devm_pci_pwrctl_pwrseq_power_off(void *data) +{ + struct pwrseq_desc *pwrseq = data; + + pwrseq_power_off(pwrseq); +} + +static int pci_pwrctl_pwrseq_probe(struct platform_device *pdev) +{ + struct pci_pwrctl_pwrseq_data *data; + struct device *dev = &pdev->dev; + int ret; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->pwrseq = devm_pwrseq_get(dev, of_device_get_match_data(dev)); + if (IS_ERR(data->pwrseq)) + return dev_err_probe(dev, PTR_ERR(data->pwrseq), + "Failed to get the power sequencer\n"); + + ret = pwrseq_power_on(data->pwrseq); + if (ret) + return dev_err_probe(dev, ret, + "Failed to power-on the device\n"); + + ret = devm_add_action_or_reset(dev, devm_pci_pwrctl_pwrseq_power_off, + data->pwrseq); + if (ret) + return ret; + + pci_pwrctl_init(&data->ctx, dev); + + ret = devm_pci_pwrctl_device_set_ready(dev, &data->ctx); + if (ret) + return dev_err_probe(dev, ret, + "Failed to register the pwrctl wrapper\n"); + + return 0; +} + +static const struct of_device_id pci_pwrctl_pwrseq_of_match[] = { + { + /* ATH11K in QCA6390 package. */ + .compatible = "pci17cb,1101", + .data = "wlan", + }, + { + /* ATH11K in WCN6855 package. */ + .compatible = "pci17cb,1103", + .data = "wlan", + }, + { + /* ATH12K in WCN7850 package. */ + .compatible = "pci17cb,1107", + .data = "wlan", + }, + { } +}; +MODULE_DEVICE_TABLE(of, pci_pwrctl_pwrseq_of_match); + +static struct platform_driver pci_pwrctl_pwrseq_driver = { + .driver = { + .name = "pci-pwrctl-pwrseq", + .of_match_table = pci_pwrctl_pwrseq_of_match, + }, + .probe = pci_pwrctl_pwrseq_probe, +}; +module_platform_driver(pci_pwrctl_pwrseq_driver); + +MODULE_AUTHOR("Bartosz Golaszewski "); +MODULE_DESCRIPTION("Generic PCI Power Control module for power sequenced devices"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/pci-pwrctl.h b/include/linux/pci-pwrctl.h deleted file mode 100644 index 0d23dddf59ec..000000000000 --- a/include/linux/pci-pwrctl.h +++ /dev/null @@ -1,54 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2024 Linaro Ltd. - */ - -#ifndef __PCI_PWRCTL_H__ -#define __PCI_PWRCTL_H__ - -#include -#include - -struct device; -struct device_link; - -/* - * This is a simple framework for solving the issue of PCI devices that require - * certain resources (regulators, GPIOs, clocks) to be enabled before the - * device can actually be detected on the PCI bus. - * - * The idea is to reuse the platform bus to populate OF nodes describing the - * PCI device and its resources, let these platform devices probe and enable - * relevant resources and then trigger a rescan of the PCI bus allowing for the - * same device (with a second associated struct device) to be registered with - * the PCI subsystem. - * - * To preserve a correct hierarchy for PCI power management and device reset, - * we create a device link between the power control platform device (parent) - * and the supplied PCI device (child). - */ - -/** - * struct pci_pwrctl - PCI device power control context. - * @dev: Address of the power controlling device. - * - * An object of this type must be allocated by the PCI power control device and - * passed to the pwrctl subsystem to trigger a bus rescan and setup a device - * link with the device once it's up. - */ -struct pci_pwrctl { - struct device *dev; - - /* Private: don't use. */ - struct notifier_block nb; - struct device_link *link; - struct work_struct work; -}; - -void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev); -int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl); -void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl); -int devm_pci_pwrctl_device_set_ready(struct device *dev, - struct pci_pwrctl *pwrctl); - -#endif /* __PCI_PWRCTL_H__ */ diff --git a/include/linux/pci-pwrctrl.h b/include/linux/pci-pwrctrl.h new file mode 100644 index 000000000000..0d23dddf59ec --- /dev/null +++ b/include/linux/pci-pwrctrl.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Linaro Ltd. + */ + +#ifndef __PCI_PWRCTL_H__ +#define __PCI_PWRCTL_H__ + +#include +#include + +struct device; +struct device_link; + +/* + * This is a simple framework for solving the issue of PCI devices that require + * certain resources (regulators, GPIOs, clocks) to be enabled before the + * device can actually be detected on the PCI bus. + * + * The idea is to reuse the platform bus to populate OF nodes describing the + * PCI device and its resources, let these platform devices probe and enable + * relevant resources and then trigger a rescan of the PCI bus allowing for the + * same device (with a second associated struct device) to be registered with + * the PCI subsystem. + * + * To preserve a correct hierarchy for PCI power management and device reset, + * we create a device link between the power control platform device (parent) + * and the supplied PCI device (child). + */ + +/** + * struct pci_pwrctl - PCI device power control context. + * @dev: Address of the power controlling device. + * + * An object of this type must be allocated by the PCI power control device and + * passed to the pwrctl subsystem to trigger a bus rescan and setup a device + * link with the device once it's up. + */ +struct pci_pwrctl { + struct device *dev; + + /* Private: don't use. */ + struct notifier_block nb; + struct device_link *link; + struct work_struct work; +}; + +void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev); +int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl); +void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl); +int devm_pci_pwrctl_device_set_ready(struct device *dev, + struct pci_pwrctl *pwrctl); + +#endif /* __PCI_PWRCTL_H__ */ -- cgit v1.2.3 From 3f925cd6287401bbc9d568f56d796a69c8bd292a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 15 Nov 2024 15:44:28 -0600 Subject: PCI/pwrctrl: Rename pwrctrl functions and structures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename pwrctrl functions and structures from "pwrctl" to "pwrctrl" to match the similar file renames. Link: https://lore.kernel.org/r/20241115214428.2061153-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Acked-by: Krzysztof Wilczyński --- drivers/pci/bus.c | 25 ++++++----- drivers/pci/pwrctrl/core.c | 77 ++++++++++++++++---------------- drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c | 32 ++++++------- drivers/pci/remove.c | 4 +- include/linux/pci-pwrctrl.h | 22 ++++----- 5 files changed, 81 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 90d775601789..0d27809746df 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -322,10 +322,10 @@ void __weak pcibios_resource_survey_bus(struct pci_bus *bus) { } void __weak pcibios_bus_add_device(struct pci_dev *pdev) { } /* - * Create pwrctl devices (if required) for the PCI devices to handle the power + * Create pwrctrl devices (if required) for the PCI devices to handle the power * state. */ -static void pci_pwrctl_create_devices(struct pci_dev *dev) +static void pci_pwrctrl_create_devices(struct pci_dev *dev) { struct device_node *np = dev_of_node(&dev->dev); struct device *parent = &dev->dev; @@ -337,23 +337,24 @@ static void pci_pwrctl_create_devices(struct pci_dev *dev) */ if (np && pci_is_bridge(dev)) { /* - * Now look for the child PCI device nodes and create pwrctl - * devices for them. The pwrctl device drivers will manage the + * Now look for the child PCI device nodes and create pwrctrl + * devices for them. The pwrctrl device drivers will manage the * power state of the devices. */ for_each_available_child_of_node_scoped(np, child) { /* - * First check whether the pwrctl device really needs to - * be created or not. This is decided based on at least - * one of the power supplies being defined in the - * devicetree node of the device. + * First check whether the pwrctrl device really + * needs to be created or not. This is decided + * based on at least one of the power supplies + * being defined in the devicetree node of the + * device. */ if (!of_pci_supply_present(child)) { pci_dbg(dev, "skipping OF node: %s\n", child->name); return; } - /* Now create the pwrctl device */ + /* Now create the pwrctrl device */ pdev = of_platform_device_create(child, NULL, parent); if (!pdev) pci_err(dev, "failed to create OF node: %s\n", child->name); @@ -385,12 +386,12 @@ void pci_bus_add_device(struct pci_dev *dev) pci_proc_attach_device(dev); pci_bridge_d3_update(dev); - pci_pwrctl_create_devices(dev); + pci_pwrctrl_create_devices(dev); /* - * If the PCI device is associated with a pwrctl device with a + * If the PCI device is associated with a pwrctrl device with a * power supply, create a device link between the PCI device and - * pwrctl device. This ensures that pwrctl drivers are probed + * pwrctrl device. This ensures that pwrctrl drivers are probed * before PCI client drivers. */ pdev = of_find_device_by_node(dn); diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c index 88f06e9271ba..2fb174db91e5 100644 --- a/drivers/pci/pwrctrl/core.c +++ b/drivers/pci/pwrctrl/core.c @@ -11,13 +11,13 @@ #include #include -static int pci_pwrctl_notify(struct notifier_block *nb, unsigned long action, - void *data) +static int pci_pwrctrl_notify(struct notifier_block *nb, unsigned long action, + void *data) { - struct pci_pwrctl *pwrctl = container_of(nb, struct pci_pwrctl, nb); + struct pci_pwrctrl *pwrctrl = container_of(nb, struct pci_pwrctrl, nb); struct device *dev = data; - if (dev_fwnode(dev) != dev_fwnode(pwrctl->dev)) + if (dev_fwnode(dev) != dev_fwnode(pwrctrl->dev)) return NOTIFY_DONE; switch (action) { @@ -40,31 +40,32 @@ static int pci_pwrctl_notify(struct notifier_block *nb, unsigned long action, static void rescan_work_func(struct work_struct *work) { - struct pci_pwrctl *pwrctl = container_of(work, struct pci_pwrctl, work); + struct pci_pwrctrl *pwrctrl = container_of(work, + struct pci_pwrctrl, work); pci_lock_rescan_remove(); - pci_rescan_bus(to_pci_dev(pwrctl->dev->parent)->bus); + pci_rescan_bus(to_pci_dev(pwrctrl->dev->parent)->bus); pci_unlock_rescan_remove(); } /** - * pci_pwrctl_init() - Initialize the PCI power control context struct + * pci_pwrctrl_init() - Initialize the PCI power control context struct * - * @pwrctl: PCI power control data + * @pwrctrl: PCI power control data * @dev: Parent device */ -void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev) +void pci_pwrctrl_init(struct pci_pwrctrl *pwrctrl, struct device *dev) { - pwrctl->dev = dev; - INIT_WORK(&pwrctl->work, rescan_work_func); + pwrctrl->dev = dev; + INIT_WORK(&pwrctrl->work, rescan_work_func); } -EXPORT_SYMBOL_GPL(pci_pwrctl_init); +EXPORT_SYMBOL_GPL(pci_pwrctrl_init); /** - * pci_pwrctl_device_set_ready() - Notify the pwrctl subsystem that the PCI + * pci_pwrctrl_device_set_ready() - Notify the pwrctrl subsystem that the PCI * device is powered-up and ready to be detected. * - * @pwrctl: PCI power control data. + * @pwrctrl: PCI power control data. * * Returns: * 0 on success, negative error number on error. @@ -74,31 +75,31 @@ EXPORT_SYMBOL_GPL(pci_pwrctl_init); * that the bus rescan was successfully started. The device will get bound to * its PCI driver asynchronously. */ -int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl) +int pci_pwrctrl_device_set_ready(struct pci_pwrctrl *pwrctrl) { int ret; - if (!pwrctl->dev) + if (!pwrctrl->dev) return -ENODEV; - pwrctl->nb.notifier_call = pci_pwrctl_notify; - ret = bus_register_notifier(&pci_bus_type, &pwrctl->nb); + pwrctrl->nb.notifier_call = pci_pwrctrl_notify; + ret = bus_register_notifier(&pci_bus_type, &pwrctrl->nb); if (ret) return ret; - schedule_work(&pwrctl->work); + schedule_work(&pwrctrl->work); return 0; } -EXPORT_SYMBOL_GPL(pci_pwrctl_device_set_ready); +EXPORT_SYMBOL_GPL(pci_pwrctrl_device_set_ready); /** - * pci_pwrctl_device_unset_ready() - Notify the pwrctl subsystem that the PCI + * pci_pwrctrl_device_unset_ready() - Notify the pwrctrl subsystem that the PCI * device is about to be powered-down. * - * @pwrctl: PCI power control data. + * @pwrctrl: PCI power control data. */ -void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl) +void pci_pwrctrl_device_unset_ready(struct pci_pwrctrl *pwrctrl) { /* * We don't have to delete the link here. Typically, this function @@ -106,41 +107,41 @@ void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl) * it is being detached then the child PCI device must have already * been unbound too or the device core wouldn't let us unbind. */ - bus_unregister_notifier(&pci_bus_type, &pwrctl->nb); + bus_unregister_notifier(&pci_bus_type, &pwrctrl->nb); } -EXPORT_SYMBOL_GPL(pci_pwrctl_device_unset_ready); +EXPORT_SYMBOL_GPL(pci_pwrctrl_device_unset_ready); -static void devm_pci_pwrctl_device_unset_ready(void *data) +static void devm_pci_pwrctrl_device_unset_ready(void *data) { - struct pci_pwrctl *pwrctl = data; + struct pci_pwrctrl *pwrctrl = data; - pci_pwrctl_device_unset_ready(pwrctl); + pci_pwrctrl_device_unset_ready(pwrctrl); } /** - * devm_pci_pwrctl_device_set_ready - Managed variant of - * pci_pwrctl_device_set_ready(). + * devm_pci_pwrctrl_device_set_ready - Managed variant of + * pci_pwrctrl_device_set_ready(). * - * @dev: Device managing this pwrctl provider. - * @pwrctl: PCI power control data. + * @dev: Device managing this pwrctrl provider. + * @pwrctrl: PCI power control data. * * Returns: * 0 on success, negative error number on error. */ -int devm_pci_pwrctl_device_set_ready(struct device *dev, - struct pci_pwrctl *pwrctl) +int devm_pci_pwrctrl_device_set_ready(struct device *dev, + struct pci_pwrctrl *pwrctrl) { int ret; - ret = pci_pwrctl_device_set_ready(pwrctl); + ret = pci_pwrctrl_device_set_ready(pwrctrl); if (ret) return ret; return devm_add_action_or_reset(dev, - devm_pci_pwrctl_device_unset_ready, - pwrctl); + devm_pci_pwrctrl_device_unset_ready, + pwrctrl); } -EXPORT_SYMBOL_GPL(devm_pci_pwrctl_device_set_ready); +EXPORT_SYMBOL_GPL(devm_pci_pwrctrl_device_set_ready); MODULE_AUTHOR("Bartosz Golaszewski "); MODULE_DESCRIPTION("PCI Device Power Control core driver"); diff --git a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c index cc19ad61dd6e..e9f89866b7c2 100644 --- a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c +++ b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c @@ -13,21 +13,21 @@ #include #include -struct pci_pwrctl_pwrseq_data { - struct pci_pwrctl ctx; +struct pci_pwrctrl_pwrseq_data { + struct pci_pwrctrl ctx; struct pwrseq_desc *pwrseq; }; -static void devm_pci_pwrctl_pwrseq_power_off(void *data) +static void devm_pci_pwrctrl_pwrseq_power_off(void *data) { struct pwrseq_desc *pwrseq = data; pwrseq_power_off(pwrseq); } -static int pci_pwrctl_pwrseq_probe(struct platform_device *pdev) +static int pci_pwrctrl_pwrseq_probe(struct platform_device *pdev) { - struct pci_pwrctl_pwrseq_data *data; + struct pci_pwrctrl_pwrseq_data *data; struct device *dev = &pdev->dev; int ret; @@ -45,22 +45,22 @@ static int pci_pwrctl_pwrseq_probe(struct platform_device *pdev) return dev_err_probe(dev, ret, "Failed to power-on the device\n"); - ret = devm_add_action_or_reset(dev, devm_pci_pwrctl_pwrseq_power_off, + ret = devm_add_action_or_reset(dev, devm_pci_pwrctrl_pwrseq_power_off, data->pwrseq); if (ret) return ret; - pci_pwrctl_init(&data->ctx, dev); + pci_pwrctrl_init(&data->ctx, dev); - ret = devm_pci_pwrctl_device_set_ready(dev, &data->ctx); + ret = devm_pci_pwrctrl_device_set_ready(dev, &data->ctx); if (ret) return dev_err_probe(dev, ret, - "Failed to register the pwrctl wrapper\n"); + "Failed to register the pwrctrl wrapper\n"); return 0; } -static const struct of_device_id pci_pwrctl_pwrseq_of_match[] = { +static const struct of_device_id pci_pwrctrl_pwrseq_of_match[] = { { /* ATH11K in QCA6390 package. */ .compatible = "pci17cb,1101", @@ -78,16 +78,16 @@ static const struct of_device_id pci_pwrctl_pwrseq_of_match[] = { }, { } }; -MODULE_DEVICE_TABLE(of, pci_pwrctl_pwrseq_of_match); +MODULE_DEVICE_TABLE(of, pci_pwrctrl_pwrseq_of_match); -static struct platform_driver pci_pwrctl_pwrseq_driver = { +static struct platform_driver pci_pwrctrl_pwrseq_driver = { .driver = { - .name = "pci-pwrctl-pwrseq", - .of_match_table = pci_pwrctl_pwrseq_of_match, + .name = "pci-pwrctrl-pwrseq", + .of_match_table = pci_pwrctrl_pwrseq_of_match, }, - .probe = pci_pwrctl_pwrseq_probe, + .probe = pci_pwrctrl_pwrseq_probe, }; -module_platform_driver(pci_pwrctl_pwrseq_driver); +module_platform_driver(pci_pwrctrl_pwrseq_driver); MODULE_AUTHOR("Bartosz Golaszewski "); MODULE_DESCRIPTION("Generic PCI Power Control module for power sequenced devices"); diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 3dd9b3024956..27ef371a8443 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -17,7 +17,7 @@ static void pci_free_resources(struct pci_dev *dev) } } -static void pci_pwrctl_unregister(struct device *dev) +static void pci_pwrctrl_unregister(struct device *dev) { struct platform_device *pdev; @@ -34,7 +34,7 @@ static void pci_stop_dev(struct pci_dev *dev) pci_pme_active(dev, false); if (pci_dev_is_added(dev)) { - pci_pwrctl_unregister(&dev->dev); + pci_pwrctrl_unregister(&dev->dev); device_release_driver(&dev->dev); pci_proc_detach_device(dev); pci_remove_sysfs_dev_files(dev); diff --git a/include/linux/pci-pwrctrl.h b/include/linux/pci-pwrctrl.h index 0d23dddf59ec..7d439b0675e9 100644 --- a/include/linux/pci-pwrctrl.h +++ b/include/linux/pci-pwrctrl.h @@ -3,8 +3,8 @@ * Copyright (C) 2024 Linaro Ltd. */ -#ifndef __PCI_PWRCTL_H__ -#define __PCI_PWRCTL_H__ +#ifndef __PCI_PWRCTRL_H__ +#define __PCI_PWRCTRL_H__ #include #include @@ -29,14 +29,14 @@ struct device_link; */ /** - * struct pci_pwrctl - PCI device power control context. + * struct pci_pwrctrl - PCI device power control context. * @dev: Address of the power controlling device. * * An object of this type must be allocated by the PCI power control device and - * passed to the pwrctl subsystem to trigger a bus rescan and setup a device + * passed to the pwrctrl subsystem to trigger a bus rescan and setup a device * link with the device once it's up. */ -struct pci_pwrctl { +struct pci_pwrctrl { struct device *dev; /* Private: don't use. */ @@ -45,10 +45,10 @@ struct pci_pwrctl { struct work_struct work; }; -void pci_pwrctl_init(struct pci_pwrctl *pwrctl, struct device *dev); -int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl); -void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl); -int devm_pci_pwrctl_device_set_ready(struct device *dev, - struct pci_pwrctl *pwrctl); +void pci_pwrctrl_init(struct pci_pwrctrl *pwrctrl, struct device *dev); +int pci_pwrctrl_device_set_ready(struct pci_pwrctrl *pwrctrl); +void pci_pwrctrl_device_unset_ready(struct pci_pwrctrl *pwrctrl); +int devm_pci_pwrctrl_device_set_ready(struct device *dev, + struct pci_pwrctrl *pwrctrl); -#endif /* __PCI_PWRCTL_H__ */ +#endif /* __PCI_PWRCTRL_H__ */ -- cgit v1.2.3