Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 99b9d7b4 authored by Oded Gabbay's avatar Oded Gabbay Committed by Greg Kroah-Hartman
Browse files

habanalabs: add basic Goya support



This patch adds a basic support for the Goya device. The code initializes
the device's PCI controller and PCI bars. It also initializes various S/W
structures and adds some basic helper functions.

Reviewed-by: default avatarMike Rapoport <rppt@linux.ibm.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 1ea2a20e
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -5,3 +5,6 @@
obj-m	:= habanalabs.o

habanalabs-y := habanalabs_drv.o device.o

include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)
+71 −0
Original line number Diff line number Diff line
@@ -120,8 +120,11 @@ static int device_setup_cdev(struct hl_device *hdev, struct class *hclass,
 */
static int device_early_init(struct hl_device *hdev)
{
	int rc;

	switch (hdev->asic_type) {
	case ASIC_GOYA:
		goya_set_asic_funcs(hdev);
		strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
		break;
	default:
@@ -130,6 +133,10 @@ static int device_early_init(struct hl_device *hdev)
		return -EINVAL;
	}

	rc = hdev->asic_funcs->early_init(hdev);
	if (rc)
		return rc;

	return 0;
}

@@ -141,6 +148,10 @@ static int device_early_init(struct hl_device *hdev)
 */
static void device_early_fini(struct hl_device *hdev)
{

	if (hdev->asic_funcs->early_fini)
		hdev->asic_funcs->early_fini(hdev);

}

/*
@@ -154,8 +165,15 @@ static void device_early_fini(struct hl_device *hdev)
 */
int hl_device_suspend(struct hl_device *hdev)
{
	int rc;

	pci_save_state(hdev->pdev);

	rc = hdev->asic_funcs->suspend(hdev);
	if (rc)
		dev_err(hdev->dev,
			"Failed to disable PCI access of device CPU\n");

	/* Shut down the device */
	pci_disable_device(hdev->pdev);
	pci_set_power_state(hdev->pdev, PCI_D3hot);
@@ -185,6 +203,13 @@ int hl_device_resume(struct hl_device *hdev)
		return rc;
	}

	rc = hdev->asic_funcs->resume(hdev);
	if (rc) {
		dev_err(hdev->dev,
			"Failed to enable PCI access from device CPU\n");
		return rc;
	}

	return 0;
}

@@ -212,11 +237,21 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
	if (rc)
		goto release_device;

	/*
	 * Start calling ASIC initialization. First S/W then H/W and finally
	 * late init
	 */
	rc = hdev->asic_funcs->sw_init(hdev);
	if (rc)
		goto early_fini;

	dev_notice(hdev->dev,
		"Successfully added device to habanalabs driver\n");

	return 0;

early_fini:
	device_early_fini(hdev);
release_device:
	device_destroy(hclass, hdev->dev->devt);
	cdev_del(&hdev->cdev);
@@ -247,6 +282,9 @@ void hl_device_fini(struct hl_device *hdev)
	/* Mark device as disabled */
	hdev->disabled = true;

	/* Call ASIC S/W finalize function */
	hdev->asic_funcs->sw_fini(hdev);

	device_early_fini(hdev);

	/* Hide device from user */
@@ -338,3 +376,36 @@ int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,

	return *val ? 0 : -ETIMEDOUT;
}

/*
 * MMIO register access helper functions.
 */

/*
 * hl_rreg - Read an MMIO register
 *
 * @hdev: pointer to habanalabs device structure
 * @reg: MMIO register offset (in bytes)
 *
 * Returns the value of the MMIO register we are asked to read
 *
 */
inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
{
	return readl(hdev->rmmio + reg);
}

/*
 * hl_wreg - Write to an MMIO register
 *
 * @hdev: pointer to habanalabs device structure
 * @reg: MMIO register offset (in bytes)
 * @val: 32-bit value
 *
 * Writes the 32-bit value into the MMIO register
 *
 */
inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
{
	writel(val, hdev->rmmio + reg);
}
+3 −0
Original line number Diff line number Diff line
subdir-ccflags-y += -I$(src)

HL_GOYA_FILES :=  goya/goya.o
+632 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

/*
 * Copyright 2016-2019 HabanaLabs, Ltd.
 * All Rights Reserved.
 */

#include "goyaP.h"
#include "include/goya/asic_reg/goya_masks.h"

#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/genalloc.h>

/*
 * GOYA security scheme:
 *
 * 1. Host is protected by:
 *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
 *        - MMU
 *
 * 2. DRAM is protected by:
 *        - Range registers (protect the first 512MB)
 *        - MMU (isolation between users)
 *
 * 3. Configuration is protected by:
 *        - Range registers
 *        - Protection bits
 *
 * When MMU is disabled:
 *
 * QMAN DMA: PQ, CQ, CP, DMA are secured.
 * PQ, CB and the data are on the host.
 *
 * QMAN TPC/MME:
 * PQ, CQ and CP are not secured.
 * PQ, CB and the data are on the SRAM/DRAM.
 *
 * Since QMAN DMA is secured, KMD is parsing the DMA CB:
 *     - KMD checks DMA pointer
 *     - WREG, MSG_PROT are not allowed.
 *     - MSG_LONG/SHORT are allowed.
 *
 * A read/write transaction by the QMAN to a protected area will succeed if
 * and only if the QMAN's CP is secured and MSG_PROT is used
 *
 *
 * When MMU is enabled:
 *
 * QMAN DMA: PQ, CQ and CP are secured.
 * MMU is set to bypass on the Secure props register of the QMAN.
 * The reasons we don't enable MMU for PQ, CQ and CP are:
 *     - PQ entry is in kernel address space and KMD doesn't map it.
 *     - CP writes to MSIX register and to kernel address space (completion
 *       queue).
 *
 * DMA is not secured but because CP is secured, KMD still needs to parse the
 * CB, but doesn't need to check the DMA addresses.
 *
 * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD
 * doesn't map memory in MMU.
 *
 * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
 *
 * DMA RR does NOT protect host because DMA is not secured
 *
 */

#define GOYA_MMU_REGS_NUM		61

#define GOYA_DMA_POOL_BLK_SIZE		0x100		/* 256 bytes */

#define GOYA_RESET_TIMEOUT_MSEC		500		/* 500ms */
#define GOYA_PLDM_RESET_TIMEOUT_MSEC	20000		/* 20s */
#define GOYA_RESET_WAIT_MSEC		1		/* 1ms */
#define GOYA_CPU_RESET_WAIT_MSEC	100		/* 100ms */
#define GOYA_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
#define GOYA_CPU_TIMEOUT_USEC		10000000	/* 10s */
#define GOYA_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */

#define GOYA_QMAN0_FENCE_VAL		0xD169B243

#define GOYA_MAX_INITIATORS		20

static void goya_get_fixed_properties(struct hl_device *hdev)
{
	struct asic_fixed_properties *prop = &hdev->asic_prop;

	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;

	prop->dram_base_address = DRAM_PHYS_BASE;
	prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;

	prop->sram_base_address = SRAM_BASE_ADDR;
	prop->sram_size = SRAM_SIZE;
	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
	prop->sram_user_base_address = prop->sram_base_address +
						SRAM_USER_BASE_OFFSET;

	prop->host_phys_base_address = HOST_PHYS_BASE;
	prop->va_space_host_start_address = VA_HOST_SPACE_START;
	prop->va_space_host_end_address = VA_HOST_SPACE_END;
	prop->va_space_dram_start_address = VA_DDR_SPACE_START;
	prop->va_space_dram_end_address = VA_DDR_SPACE_END;
	prop->cfg_size = CFG_SIZE;
	prop->max_asid = MAX_ASID;
	prop->tpc_enabled_mask = TPC_ENABLED_MASK;

	prop->high_pll = PLL_HIGH_DEFAULT;
}

/*
 * goya_pci_bars_map - Map PCI BARS of Goya device
 *
 * @hdev: pointer to hl_device structure
 *
 * Request PCI regions and map them to kernel virtual addresses.
 * Returns 0 on success
 *
 */
int goya_pci_bars_map(struct hl_device *hdev)
{
	struct pci_dev *pdev = hdev->pdev;
	int rc;

	rc = pci_request_regions(pdev, HL_NAME);
	if (rc) {
		dev_err(hdev->dev, "Cannot obtain PCI resources\n");
		return rc;
	}

	hdev->pcie_bar[SRAM_CFG_BAR_ID] =
			pci_ioremap_bar(pdev, SRAM_CFG_BAR_ID);
	if (!hdev->pcie_bar[SRAM_CFG_BAR_ID]) {
		dev_err(hdev->dev, "pci_ioremap_bar failed for CFG\n");
		rc = -ENODEV;
		goto err_release_regions;
	}

	hdev->pcie_bar[MSIX_BAR_ID] = pci_ioremap_bar(pdev, MSIX_BAR_ID);
	if (!hdev->pcie_bar[MSIX_BAR_ID]) {
		dev_err(hdev->dev, "pci_ioremap_bar failed for MSIX\n");
		rc = -ENODEV;
		goto err_unmap_sram_cfg;
	}

	hdev->pcie_bar[DDR_BAR_ID] = pci_ioremap_wc_bar(pdev, DDR_BAR_ID);
	if (!hdev->pcie_bar[DDR_BAR_ID]) {
		dev_err(hdev->dev, "pci_ioremap_bar failed for DDR\n");
		rc = -ENODEV;
		goto err_unmap_msix;
	}

	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
				(CFG_BASE - SRAM_BASE_ADDR);

	return 0;

err_unmap_msix:
	iounmap(hdev->pcie_bar[MSIX_BAR_ID]);
err_unmap_sram_cfg:
	iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]);
err_release_regions:
	pci_release_regions(pdev);

	return rc;
}

/*
 * goya_pci_bars_unmap - Unmap PCI BARS of Goya device
 *
 * @hdev: pointer to hl_device structure
 *
 * Release all PCI BARS and unmap their virtual addresses
 *
 */
static void goya_pci_bars_unmap(struct hl_device *hdev)
{
	struct pci_dev *pdev = hdev->pdev;

	iounmap(hdev->pcie_bar[DDR_BAR_ID]);
	iounmap(hdev->pcie_bar[MSIX_BAR_ID]);
	iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]);
	pci_release_regions(pdev);
}

/*
 * goya_elbi_write - Write through the ELBI interface
 *
 * @hdev: pointer to hl_device structure
 *
 * return 0 on success, -1 on failure
 *
 */
static int goya_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
{
	struct pci_dev *pdev = hdev->pdev;
	ktime_t timeout;
	u32 val;

	/* Clear previous status */
	pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);

	pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
	pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
	pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
				PCI_CONFIG_ELBI_CTRL_WRITE);

	timeout = ktime_add_ms(ktime_get(), 10);
	for (;;) {
		pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
		if (val & PCI_CONFIG_ELBI_STS_MASK)
			break;
		if (ktime_compare(ktime_get(), timeout) > 0) {
			pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
						&val);
			break;
		}
		usleep_range(300, 500);
	}

	if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
		return 0;

	if (val & PCI_CONFIG_ELBI_STS_ERR) {
		dev_err(hdev->dev, "Error writing to ELBI\n");
		return -EIO;
	}

	if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
		dev_err(hdev->dev, "ELBI write didn't finish in time\n");
		return -EIO;
	}

	dev_err(hdev->dev, "ELBI write has undefined bits in status\n");
	return -EIO;
}

/*
 * goya_iatu_write - iatu write routine
 *
 * @hdev: pointer to hl_device structure
 *
 */
static int goya_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
{
	u32 dbi_offset;
	int rc;

	dbi_offset = addr & 0xFFF;

	rc = goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0x00300000);
	rc |= goya_elbi_write(hdev, mmPCIE_DBI_BASE + dbi_offset, data);

	if (rc)
		return -EIO;

	return 0;
}

void goya_reset_link_through_bridge(struct hl_device *hdev)
{
	struct pci_dev *pdev = hdev->pdev;
	struct pci_dev *parent_port;
	u16 val;

	parent_port = pdev->bus->self;
	pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val);
	val |= PCI_BRIDGE_CTL_BUS_RESET;
	pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
	ssleep(1);

	val &= ~(PCI_BRIDGE_CTL_BUS_RESET);
	pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
	ssleep(3);
}

/*
 * goya_set_ddr_bar_base - set DDR bar to map specific device address
 *
 * @hdev: pointer to hl_device structure
 * @addr: address in DDR. Must be aligned to DDR bar size
 *
 * This function configures the iATU so that the DDR bar will start at the
 * specified addr.
 *
 */
static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
{
	struct goya_device *goya = hdev->asic_specific;
	int rc;

	if ((goya) && (goya->ddr_bar_cur_addr == addr))
		return 0;

	/* Inbound Region 1 - Bar 4 - Point to DDR */
	rc = goya_iatu_write(hdev, 0x314, lower_32_bits(addr));
	rc |= goya_iatu_write(hdev, 0x318, upper_32_bits(addr));
	rc |= goya_iatu_write(hdev, 0x300, 0);
	/* Enable + Bar match + match enable + Bar 4 */
	rc |= goya_iatu_write(hdev, 0x304, 0xC0080400);

	/* Return the DBI window to the default location */
	rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0);
	rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0);

	if (rc) {
		dev_err(hdev->dev, "failed to map DDR bar to 0x%08llx\n", addr);
		return -EIO;
	}

	if (goya)
		goya->ddr_bar_cur_addr = addr;

	return 0;
}

/*
 * goya_init_iatu - Initialize the iATU unit inside the PCI controller
 *
 * @hdev: pointer to hl_device structure
 *
 * This is needed in case the firmware doesn't initialize the iATU
 *
 */
static int goya_init_iatu(struct hl_device *hdev)
{
	int rc;

	/* Inbound Region 0 - Bar 0 - Point to SRAM_BASE_ADDR */
	rc  = goya_iatu_write(hdev, 0x114, lower_32_bits(SRAM_BASE_ADDR));
	rc |= goya_iatu_write(hdev, 0x118, upper_32_bits(SRAM_BASE_ADDR));
	rc |= goya_iatu_write(hdev, 0x100, 0);
	/* Enable + Bar match + match enable */
	rc |= goya_iatu_write(hdev, 0x104, 0xC0080000);

	/* Inbound Region 1 - Bar 4 - Point to DDR */
	rc |= goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);

	/* Outbound Region 0 - Point to Host */
	rc |= goya_iatu_write(hdev, 0x008, lower_32_bits(HOST_PHYS_BASE));
	rc |= goya_iatu_write(hdev, 0x00C, upper_32_bits(HOST_PHYS_BASE));
	rc |= goya_iatu_write(hdev, 0x010,
		lower_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1));
	rc |= goya_iatu_write(hdev, 0x014, 0);
	rc |= goya_iatu_write(hdev, 0x018, 0);
	rc |= goya_iatu_write(hdev, 0x020,
		upper_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1));
	/* Increase region size */
	rc |= goya_iatu_write(hdev, 0x000, 0x00002000);
	/* Enable */
	rc |= goya_iatu_write(hdev, 0x004, 0x80000000);

	/* Return the DBI window to the default location */
	rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0);
	rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0);

	if (rc)
		return -EIO;

	return 0;
}

/*
 * goya_early_init - GOYA early initialization code
 *
 * @hdev: pointer to hl_device structure
 *
 * Verify PCI bars
 * Set DMA masks
 * PCI controller initialization
 * Map PCI bars
 *
 */
static int goya_early_init(struct hl_device *hdev)
{
	struct asic_fixed_properties *prop = &hdev->asic_prop;
	struct pci_dev *pdev = hdev->pdev;
	u32 val;
	int rc;

	goya_get_fixed_properties(hdev);

	/* Check BAR sizes */
	if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
		dev_err(hdev->dev,
			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
			SRAM_CFG_BAR_ID,
			(unsigned long long) pci_resource_len(pdev,
							SRAM_CFG_BAR_ID),
			CFG_BAR_SIZE);
		return -ENODEV;
	}

	if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
		dev_err(hdev->dev,
			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
			MSIX_BAR_ID,
			(unsigned long long) pci_resource_len(pdev,
								MSIX_BAR_ID),
			MSIX_BAR_SIZE);
		return -ENODEV;
	}

	prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);

	/* set DMA mask for GOYA */
	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
	if (rc) {
		dev_warn(hdev->dev, "Unable to set pci dma mask to 39 bits\n");
		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
		if (rc) {
			dev_err(hdev->dev,
				"Unable to set pci dma mask to 32 bits\n");
			return rc;
		}
	}

	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
	if (rc) {
		dev_warn(hdev->dev,
			"Unable to set pci consistent dma mask to 39 bits\n");
		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
		if (rc) {
			dev_err(hdev->dev,
				"Unable to set pci consistent dma mask to 32 bits\n");
			return rc;
		}
	}

	if (hdev->reset_pcilink)
		goya_reset_link_through_bridge(hdev);

	rc = pci_enable_device_mem(pdev);
	if (rc) {
		dev_err(hdev->dev, "can't enable PCI device\n");
		return rc;
	}

	pci_set_master(pdev);

	rc = goya_init_iatu(hdev);
	if (rc) {
		dev_err(hdev->dev, "Failed to initialize iATU\n");
		goto disable_device;
	}

	rc = goya_pci_bars_map(hdev);
	if (rc) {
		dev_err(hdev->dev, "Failed to initialize PCI BARS\n");
		goto disable_device;
	}

	val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
	if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
		dev_warn(hdev->dev,
			"PCI strap is not configured correctly, PCI bus errors may occur\n");

	return 0;

disable_device:
	pci_clear_master(pdev);
	pci_disable_device(pdev);

	return rc;
}

/*
 * goya_early_fini - GOYA early finalization code
 *
 * @hdev: pointer to hl_device structure
 *
 * Unmap PCI bars
 *
 */
int goya_early_fini(struct hl_device *hdev)
{
	goya_pci_bars_unmap(hdev);

	pci_clear_master(hdev->pdev);
	pci_disable_device(hdev->pdev);

	return 0;
}

/*
 * goya_sw_init - Goya software initialization code
 *
 * @hdev: pointer to hl_device structure
 *
 */
static int goya_sw_init(struct hl_device *hdev)
{
	struct goya_device *goya;
	int rc;

	/* Allocate device structure */
	goya = kzalloc(sizeof(*goya), GFP_KERNEL);
	if (!goya)
		return -ENOMEM;

	/* according to goya_init_iatu */
	goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
	hdev->asic_specific = goya;

	/* Create DMA pool for small allocations */
	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
			&hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
	if (!hdev->dma_pool) {
		dev_err(hdev->dev, "failed to create DMA pool\n");
		rc = -ENOMEM;
		goto free_goya_device;
	}

	hdev->cpu_accessible_dma_mem =
			hdev->asic_funcs->dma_alloc_coherent(hdev,
					CPU_ACCESSIBLE_MEM_SIZE,
					&hdev->cpu_accessible_dma_address,
					GFP_KERNEL | __GFP_ZERO);

	if (!hdev->cpu_accessible_dma_mem) {
		dev_err(hdev->dev,
			"failed to allocate %d of dma memory for CPU accessible memory space\n",
			CPU_ACCESSIBLE_MEM_SIZE);
		rc = -ENOMEM;
		goto free_dma_pool;
	}

	hdev->cpu_accessible_dma_pool = gen_pool_create(CPU_PKT_SHIFT, -1);
	if (!hdev->cpu_accessible_dma_pool) {
		dev_err(hdev->dev,
			"Failed to create CPU accessible DMA pool\n");
		rc = -ENOMEM;
		goto free_cpu_pq_dma_mem;
	}

	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
				(uintptr_t) hdev->cpu_accessible_dma_mem,
				CPU_ACCESSIBLE_MEM_SIZE, -1);
	if (rc) {
		dev_err(hdev->dev,
			"Failed to add memory to CPU accessible DMA pool\n");
		rc = -EFAULT;
		goto free_cpu_pq_pool;
	}

	spin_lock_init(&goya->hw_queues_lock);

	return 0;

free_cpu_pq_pool:
	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
free_cpu_pq_dma_mem:
	hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE,
			hdev->cpu_accessible_dma_mem,
			hdev->cpu_accessible_dma_address);
free_dma_pool:
	dma_pool_destroy(hdev->dma_pool);
free_goya_device:
	kfree(goya);

	return rc;
}

/*
 * goya_sw_fini - Goya software tear-down code
 *
 * @hdev: pointer to hl_device structure
 *
 */
int goya_sw_fini(struct hl_device *hdev)
{
	struct goya_device *goya = hdev->asic_specific;

	gen_pool_destroy(hdev->cpu_accessible_dma_pool);

	hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE,
			hdev->cpu_accessible_dma_mem,
			hdev->cpu_accessible_dma_address);

	dma_pool_destroy(hdev->dma_pool);

	kfree(goya);

	return 0;
}

int goya_suspend(struct hl_device *hdev)
{
	return 0;
}

int goya_resume(struct hl_device *hdev)
{
	return 0;
}

void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
					dma_addr_t *dma_handle, gfp_t flags)
{
	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
}

void goya_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
				dma_addr_t dma_handle)
{
	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
}

static const struct hl_asic_funcs goya_funcs = {
	.early_init = goya_early_init,
	.early_fini = goya_early_fini,
	.sw_init = goya_sw_init,
	.sw_fini = goya_sw_fini,
	.suspend = goya_suspend,
	.resume = goya_resume,
	.dma_alloc_coherent = goya_dma_alloc_coherent,
	.dma_free_coherent = goya_dma_free_coherent,
};

/*
 * goya_set_asic_funcs - set Goya function pointers
 *
 * @*hdev: pointer to hl_device structure
 *
 */
void goya_set_asic_funcs(struct hl_device *hdev)
{
	hdev->asic_funcs = &goya_funcs;
}
+152 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0
 *
 * Copyright 2016-2019 HabanaLabs, Ltd.
 * All Rights Reserved.
 *
 */

#ifndef GOYAP_H_
#define GOYAP_H_

#include <uapi/misc/habanalabs.h>
#include "habanalabs.h"
#include "include/goya/goya.h"

#define NUMBER_OF_CMPLT_QUEUES		5
#define NUMBER_OF_EXT_HW_QUEUES		5
#define NUMBER_OF_CPU_HW_QUEUES		1
#define NUMBER_OF_INT_HW_QUEUES		9
#define NUMBER_OF_HW_QUEUES		(NUMBER_OF_EXT_HW_QUEUES + \
					NUMBER_OF_CPU_HW_QUEUES + \
					NUMBER_OF_INT_HW_QUEUES)

/*
 * Number of MSIX interrupts IDS:
 * Each completion queue has 1 ID
 * The event queue has 1 ID
 */
#define NUMBER_OF_INTERRUPTS		(NUMBER_OF_CMPLT_QUEUES + 1)

#if (NUMBER_OF_HW_QUEUES >= HL_MAX_QUEUES)
#error "Number of H/W queues must be smaller than HL_MAX_QUEUES"
#endif

#if (NUMBER_OF_INTERRUPTS > GOYA_MSIX_ENTRIES)
#error "Number of MSIX interrupts must be smaller or equal to GOYA_MSIX_ENTRIES"
#endif

#define QMAN_FENCE_TIMEOUT_USEC		10000	/* 10 ms */

#define QMAN_STOP_TIMEOUT_USEC		100000	/* 100 ms */

#define TPC_ENABLED_MASK		0xFF

#define PLL_HIGH_DEFAULT		1575000000	/* 1.575 GHz */

#define GOYA_ARMCP_INFO_TIMEOUT		10000000	/* 10s */

#define DRAM_PHYS_DEFAULT_SIZE		0x100000000ull	/* 4GB */

/* DRAM Memory Map */

#define CPU_FW_IMAGE_SIZE	0x10000000	/* 256MB */
#define MMU_PAGE_TABLES_SIZE	0x0E000000	/* 224MB */
#define MMU_CACHE_MNG_SIZE	0x00001000	/* 4KB */
#define CPU_PQ_PKT_SIZE		0x00001000	/* 4KB */
#define CPU_PQ_DATA_SIZE	0x01FFE000	/* 32MB - 8KB  */

#define CPU_FW_IMAGE_ADDR	DRAM_PHYS_BASE
#define MMU_PAGE_TABLES_ADDR	(CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE)
#define MMU_CACHE_MNG_ADDR	(MMU_PAGE_TABLES_ADDR + MMU_PAGE_TABLES_SIZE)
#define CPU_PQ_PKT_ADDR		(MMU_CACHE_MNG_ADDR + MMU_CACHE_MNG_SIZE)
#define CPU_PQ_DATA_ADDR	(CPU_PQ_PKT_ADDR + CPU_PQ_PKT_SIZE)
#define DRAM_BASE_ADDR_USER	(CPU_PQ_DATA_ADDR + CPU_PQ_DATA_SIZE)

#if (DRAM_BASE_ADDR_USER != 0x20000000)
#error "KMD must reserve 512MB"
#endif

/*
 * SRAM Memory Map for KMD
 *
 * KMD occupies KMD_SRAM_SIZE bytes from the start of SRAM. It is used for
 * MME/TPC QMANs
 *
 */

#define MME_QMAN_BASE_OFFSET	0x000000	/* Must be 0 */
#define MME_QMAN_LENGTH		64
#define TPC_QMAN_LENGTH		64

#define TPC0_QMAN_BASE_OFFSET	(MME_QMAN_BASE_OFFSET + \
				(MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC1_QMAN_BASE_OFFSET	(TPC0_QMAN_BASE_OFFSET + \
				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC2_QMAN_BASE_OFFSET	(TPC1_QMAN_BASE_OFFSET + \
				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC3_QMAN_BASE_OFFSET	(TPC2_QMAN_BASE_OFFSET + \
				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC4_QMAN_BASE_OFFSET	(TPC3_QMAN_BASE_OFFSET + \
				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC5_QMAN_BASE_OFFSET	(TPC4_QMAN_BASE_OFFSET + \
				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC6_QMAN_BASE_OFFSET	(TPC5_QMAN_BASE_OFFSET + \
				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC7_QMAN_BASE_OFFSET	(TPC6_QMAN_BASE_OFFSET + \
				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))

#define SRAM_KMD_RES_OFFSET	(TPC7_QMAN_BASE_OFFSET + \
				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))

#if (SRAM_KMD_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START)
#error "MME/TPC QMANs SRAM space exceeds limit"
#endif

#define SRAM_USER_BASE_OFFSET	GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START

/* Virtual address space */
#define VA_HOST_SPACE_START	0x1000000000000ull	/* 256TB */
#define VA_HOST_SPACE_END	0x3FF8000000000ull	/* 1PB - 1TB */
#define VA_HOST_SPACE_SIZE	(VA_HOST_SPACE_END - \
					VA_HOST_SPACE_START) /* 767TB */

#define VA_DDR_SPACE_START	0x800000000ull		/* 32GB */
#define VA_DDR_SPACE_END	0x2000000000ull		/* 128GB */
#define VA_DDR_SPACE_SIZE	(VA_DDR_SPACE_END - \
					VA_DDR_SPACE_START)	/* 128GB */

#define DMA_MAX_TRANSFER_SIZE	0xFFFFFFFF

#define HW_CAP_PLL		0x00000001
#define HW_CAP_DDR_0		0x00000002
#define HW_CAP_DDR_1		0x00000004
#define HW_CAP_MME		0x00000008
#define HW_CAP_CPU		0x00000010
#define HW_CAP_DMA		0x00000020
#define HW_CAP_MSIX		0x00000040
#define HW_CAP_CPU_Q		0x00000080
#define HW_CAP_MMU		0x00000100
#define HW_CAP_TPC_MBIST	0x00000200
#define HW_CAP_GOLDEN		0x00000400
#define HW_CAP_TPC		0x00000800

#define CPU_PKT_SHIFT		5
#define CPU_PKT_SIZE		(1 << CPU_PKT_SHIFT)
#define CPU_PKT_MASK		(~((1 << CPU_PKT_SHIFT) - 1))
#define CPU_MAX_PKTS_IN_CB	32
#define CPU_CB_SIZE		(CPU_PKT_SIZE * CPU_MAX_PKTS_IN_CB)
#define CPU_ACCESSIBLE_MEM_SIZE	(HL_QUEUE_LENGTH * CPU_CB_SIZE)

enum goya_fw_component {
	FW_COMP_UBOOT,
	FW_COMP_PREBOOT
};

struct goya_device {
	/* TODO: remove hw_queues_lock after moving to scheduler code */
	spinlock_t	hw_queues_lock;
	u64		ddr_bar_cur_addr;
	u32		hw_cap_initialized;
};

#endif /* GOYAP_H_ */
Loading