diff options
author | Oded Gabbay <oded.gabbay@gmail.com> | 2019-02-16 00:39:13 +0200 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2019-02-18 09:46:44 +0100 |
commit | 99b9d7b4970cf131fd17a8f4ad4870049bd7a365 (patch) | |
tree | 4a08db525a7efe09355d1a1038d035115e2c3616 /drivers/misc/habanalabs/goya/goya.c | |
parent | 1ea2a20e91a4d0543a933b4df706c2585db7e3ae (diff) | |
download | lwn-99b9d7b4970cf131fd17a8f4ad4870049bd7a365.tar.gz lwn-99b9d7b4970cf131fd17a8f4ad4870049bd7a365.zip |
habanalabs: add basic Goya support
This patch adds a basic support for the Goya device. The code initializes
the device's PCI controller and PCI bars. It also initializes various S/W
structures and adds some basic helper functions.
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/misc/habanalabs/goya/goya.c')
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 632 |
1 files changed, 632 insertions, 0 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c new file mode 100644 index 000000000000..ceaffa9afd83 --- /dev/null +++ b/drivers/misc/habanalabs/goya/goya.c @@ -0,0 +1,632 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "goyaP.h" +#include "include/goya/asic_reg/goya_masks.h" + +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/genalloc.h> + +/* + * GOYA security scheme: + * + * 1. Host is protected by: + * - Range registers (When MMU is enabled, DMA RR does NOT protect host) + * - MMU + * + * 2. DRAM is protected by: + * - Range registers (protect the first 512MB) + * - MMU (isolation between users) + * + * 3. Configuration is protected by: + * - Range registers + * - Protection bits + * + * When MMU is disabled: + * + * QMAN DMA: PQ, CQ, CP, DMA are secured. + * PQ, CB and the data are on the host. + * + * QMAN TPC/MME: + * PQ, CQ and CP are not secured. + * PQ, CB and the data are on the SRAM/DRAM. + * + * Since QMAN DMA is secured, KMD is parsing the DMA CB: + * - KMD checks DMA pointer + * - WREG, MSG_PROT are not allowed. + * - MSG_LONG/SHORT are allowed. + * + * A read/write transaction by the QMAN to a protected area will succeed if + * and only if the QMAN's CP is secured and MSG_PROT is used + * + * + * When MMU is enabled: + * + * QMAN DMA: PQ, CQ and CP are secured. + * MMU is set to bypass on the Secure props register of the QMAN. + * The reasons we don't enable MMU for PQ, CQ and CP are: + * - PQ entry is in kernel address space and KMD doesn't map it. + * - CP writes to MSIX register and to kernel address space (completion + * queue). + * + * DMA is not secured but because CP is secured, KMD still needs to parse the + * CB, but doesn't need to check the DMA addresses. + * + * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD + * doesn't map memory in MMU. + * + * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode) + * + * DMA RR does NOT protect host because DMA is not secured + * + */ + +#define GOYA_MMU_REGS_NUM 61 + +#define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ + +#define GOYA_RESET_TIMEOUT_MSEC 500 /* 500ms */ +#define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000 /* 20s */ +#define GOYA_RESET_WAIT_MSEC 1 /* 1ms */ +#define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */ +#define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ +#define GOYA_CPU_TIMEOUT_USEC 10000000 /* 10s */ +#define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ + +#define GOYA_QMAN0_FENCE_VAL 0xD169B243 + +#define GOYA_MAX_INITIATORS 20 + +static void goya_get_fixed_properties(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; + + prop->dram_base_address = DRAM_PHYS_BASE; + prop->dram_size = DRAM_PHYS_DEFAULT_SIZE; + prop->dram_end_address = prop->dram_base_address + prop->dram_size; + prop->dram_user_base_address = DRAM_BASE_ADDR_USER; + + prop->sram_base_address = SRAM_BASE_ADDR; + prop->sram_size = SRAM_SIZE; + prop->sram_end_address = prop->sram_base_address + prop->sram_size; + prop->sram_user_base_address = prop->sram_base_address + + SRAM_USER_BASE_OFFSET; + + prop->host_phys_base_address = HOST_PHYS_BASE; + prop->va_space_host_start_address = VA_HOST_SPACE_START; + prop->va_space_host_end_address = VA_HOST_SPACE_END; + prop->va_space_dram_start_address = VA_DDR_SPACE_START; + prop->va_space_dram_end_address = VA_DDR_SPACE_END; + prop->cfg_size = CFG_SIZE; + prop->max_asid = MAX_ASID; + prop->tpc_enabled_mask = TPC_ENABLED_MASK; + + prop->high_pll = PLL_HIGH_DEFAULT; +} + +/* + * goya_pci_bars_map - Map PCI BARS of Goya device + * + * @hdev: pointer to hl_device structure + * + * Request PCI regions and map them to kernel virtual addresses. + * Returns 0 on success + * + */ +int goya_pci_bars_map(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + int rc; + + rc = pci_request_regions(pdev, HL_NAME); + if (rc) { + dev_err(hdev->dev, "Cannot obtain PCI resources\n"); + return rc; + } + + hdev->pcie_bar[SRAM_CFG_BAR_ID] = + pci_ioremap_bar(pdev, SRAM_CFG_BAR_ID); + if (!hdev->pcie_bar[SRAM_CFG_BAR_ID]) { + dev_err(hdev->dev, "pci_ioremap_bar failed for CFG\n"); + rc = -ENODEV; + goto err_release_regions; + } + + hdev->pcie_bar[MSIX_BAR_ID] = pci_ioremap_bar(pdev, MSIX_BAR_ID); + if (!hdev->pcie_bar[MSIX_BAR_ID]) { + dev_err(hdev->dev, "pci_ioremap_bar failed for MSIX\n"); + rc = -ENODEV; + goto err_unmap_sram_cfg; + } + + hdev->pcie_bar[DDR_BAR_ID] = pci_ioremap_wc_bar(pdev, DDR_BAR_ID); + if (!hdev->pcie_bar[DDR_BAR_ID]) { + dev_err(hdev->dev, "pci_ioremap_bar failed for DDR\n"); + rc = -ENODEV; + goto err_unmap_msix; + } + + hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + + (CFG_BASE - SRAM_BASE_ADDR); + + return 0; + +err_unmap_msix: + iounmap(hdev->pcie_bar[MSIX_BAR_ID]); +err_unmap_sram_cfg: + iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]); +err_release_regions: + pci_release_regions(pdev); + + return rc; +} + +/* + * goya_pci_bars_unmap - Unmap PCI BARS of Goya device + * + * @hdev: pointer to hl_device structure + * + * Release all PCI BARS and unmap their virtual addresses + * + */ +static void goya_pci_bars_unmap(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + + iounmap(hdev->pcie_bar[DDR_BAR_ID]); + iounmap(hdev->pcie_bar[MSIX_BAR_ID]); + iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]); + pci_release_regions(pdev); +} + +/* + * goya_elbi_write - Write through the ELBI interface + * + * @hdev: pointer to hl_device structure + * + * return 0 on success, -1 on failure + * + */ +static int goya_elbi_write(struct hl_device *hdev, u64 addr, u32 data) +{ + struct pci_dev *pdev = hdev->pdev; + ktime_t timeout; + u32 val; + + /* Clear previous status */ + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0); + + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, + PCI_CONFIG_ELBI_CTRL_WRITE); + + timeout = ktime_add_ms(ktime_get(), 10); + for (;;) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val); + if (val & PCI_CONFIG_ELBI_STS_MASK) + break; + if (ktime_compare(ktime_get(), timeout) > 0) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, + &val); + break; + } + usleep_range(300, 500); + } + + if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) + return 0; + + if (val & PCI_CONFIG_ELBI_STS_ERR) { + dev_err(hdev->dev, "Error writing to ELBI\n"); + return -EIO; + } + + if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { + dev_err(hdev->dev, "ELBI write didn't finish in time\n"); + return -EIO; + } + + dev_err(hdev->dev, "ELBI write has undefined bits in status\n"); + return -EIO; +} + +/* + * goya_iatu_write - iatu write routine + * + * @hdev: pointer to hl_device structure + * + */ +static int goya_iatu_write(struct hl_device *hdev, u32 addr, u32 data) +{ + u32 dbi_offset; + int rc; + + dbi_offset = addr & 0xFFF; + + rc = goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0x00300000); + rc |= goya_elbi_write(hdev, mmPCIE_DBI_BASE + dbi_offset, data); + + if (rc) + return -EIO; + + return 0; +} + +void goya_reset_link_through_bridge(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + struct pci_dev *parent_port; + u16 val; + + parent_port = pdev->bus->self; + pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val); + val |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); + ssleep(1); + + val &= ~(PCI_BRIDGE_CTL_BUS_RESET); + pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); + ssleep(3); +} + +/* + * goya_set_ddr_bar_base - set DDR bar to map specific device address + * + * @hdev: pointer to hl_device structure + * @addr: address in DDR. Must be aligned to DDR bar size + * + * This function configures the iATU so that the DDR bar will start at the + * specified addr. + * + */ +static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr) +{ + struct goya_device *goya = hdev->asic_specific; + int rc; + + if ((goya) && (goya->ddr_bar_cur_addr == addr)) + return 0; + + /* Inbound Region 1 - Bar 4 - Point to DDR */ + rc = goya_iatu_write(hdev, 0x314, lower_32_bits(addr)); + rc |= goya_iatu_write(hdev, 0x318, upper_32_bits(addr)); + rc |= goya_iatu_write(hdev, 0x300, 0); + /* Enable + Bar match + match enable + Bar 4 */ + rc |= goya_iatu_write(hdev, 0x304, 0xC0080400); + + /* Return the DBI window to the default location */ + rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0); + rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0); + + if (rc) { + dev_err(hdev->dev, "failed to map DDR bar to 0x%08llx\n", addr); + return -EIO; + } + + if (goya) + goya->ddr_bar_cur_addr = addr; + + return 0; +} + +/* + * goya_init_iatu - Initialize the iATU unit inside the PCI controller + * + * @hdev: pointer to hl_device structure + * + * This is needed in case the firmware doesn't initialize the iATU + * + */ +static int goya_init_iatu(struct hl_device *hdev) +{ + int rc; + + /* Inbound Region 0 - Bar 0 - Point to SRAM_BASE_ADDR */ + rc = goya_iatu_write(hdev, 0x114, lower_32_bits(SRAM_BASE_ADDR)); + rc |= goya_iatu_write(hdev, 0x118, upper_32_bits(SRAM_BASE_ADDR)); + rc |= goya_iatu_write(hdev, 0x100, 0); + /* Enable + Bar match + match enable */ + rc |= goya_iatu_write(hdev, 0x104, 0xC0080000); + + /* Inbound Region 1 - Bar 4 - Point to DDR */ + rc |= goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE); + + /* Outbound Region 0 - Point to Host */ + rc |= goya_iatu_write(hdev, 0x008, lower_32_bits(HOST_PHYS_BASE)); + rc |= goya_iatu_write(hdev, 0x00C, upper_32_bits(HOST_PHYS_BASE)); + rc |= goya_iatu_write(hdev, 0x010, + lower_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1)); + rc |= goya_iatu_write(hdev, 0x014, 0); + rc |= goya_iatu_write(hdev, 0x018, 0); + rc |= goya_iatu_write(hdev, 0x020, + upper_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1)); + /* Increase region size */ + rc |= goya_iatu_write(hdev, 0x000, 0x00002000); + /* Enable */ + rc |= goya_iatu_write(hdev, 0x004, 0x80000000); + + /* Return the DBI window to the default location */ + rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0); + rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0); + + if (rc) + return -EIO; + + return 0; +} + +/* + * goya_early_init - GOYA early initialization code + * + * @hdev: pointer to hl_device structure + * + * Verify PCI bars + * Set DMA masks + * PCI controller initialization + * Map PCI bars + * + */ +static int goya_early_init(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct pci_dev *pdev = hdev->pdev; + u32 val; + int rc; + + goya_get_fixed_properties(hdev); + + /* Check BAR sizes */ + if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) { + dev_err(hdev->dev, + "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", + SRAM_CFG_BAR_ID, + (unsigned long long) pci_resource_len(pdev, + SRAM_CFG_BAR_ID), + CFG_BAR_SIZE); + return -ENODEV; + } + + if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) { + dev_err(hdev->dev, + "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", + MSIX_BAR_ID, + (unsigned long long) pci_resource_len(pdev, + MSIX_BAR_ID), + MSIX_BAR_SIZE); + return -ENODEV; + } + + prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); + + /* set DMA mask for GOYA */ + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); + if (rc) { + dev_warn(hdev->dev, "Unable to set pci dma mask to 39 bits\n"); + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(hdev->dev, + "Unable to set pci dma mask to 32 bits\n"); + return rc; + } + } + + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); + if (rc) { + dev_warn(hdev->dev, + "Unable to set pci consistent dma mask to 39 bits\n"); + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(hdev->dev, + "Unable to set pci consistent dma mask to 32 bits\n"); + return rc; + } + } + + if (hdev->reset_pcilink) + goya_reset_link_through_bridge(hdev); + + rc = pci_enable_device_mem(pdev); + if (rc) { + dev_err(hdev->dev, "can't enable PCI device\n"); + return rc; + } + + pci_set_master(pdev); + + rc = goya_init_iatu(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize iATU\n"); + goto disable_device; + } + + rc = goya_pci_bars_map(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize PCI BARS\n"); + goto disable_device; + } + + val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); + if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK) + dev_warn(hdev->dev, + "PCI strap is not configured correctly, PCI bus errors may occur\n"); + + return 0; + +disable_device: + pci_clear_master(pdev); + pci_disable_device(pdev); + + return rc; +} + +/* + * goya_early_fini - GOYA early finalization code + * + * @hdev: pointer to hl_device structure + * + * Unmap PCI bars + * + */ +int goya_early_fini(struct hl_device *hdev) +{ + goya_pci_bars_unmap(hdev); + + pci_clear_master(hdev->pdev); + pci_disable_device(hdev->pdev); + + return 0; +} + +/* + * goya_sw_init - Goya software initialization code + * + * @hdev: pointer to hl_device structure + * + */ +static int goya_sw_init(struct hl_device *hdev) +{ + struct goya_device *goya; + int rc; + + /* Allocate device structure */ + goya = kzalloc(sizeof(*goya), GFP_KERNEL); + if (!goya) + return -ENOMEM; + + /* according to goya_init_iatu */ + goya->ddr_bar_cur_addr = DRAM_PHYS_BASE; + hdev->asic_specific = goya; + + /* Create DMA pool for small allocations */ + hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), + &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0); + if (!hdev->dma_pool) { + dev_err(hdev->dev, "failed to create DMA pool\n"); + rc = -ENOMEM; + goto free_goya_device; + } + + hdev->cpu_accessible_dma_mem = + hdev->asic_funcs->dma_alloc_coherent(hdev, + CPU_ACCESSIBLE_MEM_SIZE, + &hdev->cpu_accessible_dma_address, + GFP_KERNEL | __GFP_ZERO); + + if (!hdev->cpu_accessible_dma_mem) { + dev_err(hdev->dev, + "failed to allocate %d of dma memory for CPU accessible memory space\n", + CPU_ACCESSIBLE_MEM_SIZE); + rc = -ENOMEM; + goto free_dma_pool; + } + + hdev->cpu_accessible_dma_pool = gen_pool_create(CPU_PKT_SHIFT, -1); + if (!hdev->cpu_accessible_dma_pool) { + dev_err(hdev->dev, + "Failed to create CPU accessible DMA pool\n"); + rc = -ENOMEM; + goto free_cpu_pq_dma_mem; + } + + rc = gen_pool_add(hdev->cpu_accessible_dma_pool, + (uintptr_t) hdev->cpu_accessible_dma_mem, + CPU_ACCESSIBLE_MEM_SIZE, -1); + if (rc) { + dev_err(hdev->dev, + "Failed to add memory to CPU accessible DMA pool\n"); + rc = -EFAULT; + goto free_cpu_pq_pool; + } + + spin_lock_init(&goya->hw_queues_lock); + + return 0; + +free_cpu_pq_pool: + gen_pool_destroy(hdev->cpu_accessible_dma_pool); +free_cpu_pq_dma_mem: + hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE, + hdev->cpu_accessible_dma_mem, + hdev->cpu_accessible_dma_address); +free_dma_pool: + dma_pool_destroy(hdev->dma_pool); +free_goya_device: + kfree(goya); + + return rc; +} + +/* + * goya_sw_fini - Goya software tear-down code + * + * @hdev: pointer to hl_device structure + * + */ +int goya_sw_fini(struct hl_device *hdev) +{ + struct goya_device *goya = hdev->asic_specific; + + gen_pool_destroy(hdev->cpu_accessible_dma_pool); + + hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE, + hdev->cpu_accessible_dma_mem, + hdev->cpu_accessible_dma_address); + + dma_pool_destroy(hdev->dma_pool); + + kfree(goya); + + return 0; +} + +int goya_suspend(struct hl_device *hdev) +{ + return 0; +} + +int goya_resume(struct hl_device *hdev) +{ + return 0; +} + +void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) +{ + return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags); +} + +void goya_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr, + dma_addr_t dma_handle) +{ + dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle); +} + +static const struct hl_asic_funcs goya_funcs = { + .early_init = goya_early_init, + .early_fini = goya_early_fini, + .sw_init = goya_sw_init, + .sw_fini = goya_sw_fini, + .suspend = goya_suspend, + .resume = goya_resume, + .dma_alloc_coherent = goya_dma_alloc_coherent, + .dma_free_coherent = goya_dma_free_coherent, +}; + +/* + * goya_set_asic_funcs - set Goya function pointers + * + * @*hdev: pointer to hl_device structure + * + */ +void goya_set_asic_funcs(struct hl_device *hdev) +{ + hdev->asic_funcs = &goya_funcs; +} |