Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fb4e3bee authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull IOMMU updates from Joerg Roedel:
 "This update comes with:

   - Support for lockless operation in the ARM io-pgtable code.

     This is an important step to solve the scalability problems in the
     common dma-iommu code for ARM

   - Some Errata workarounds for ARM SMMU implemenations

   - Rewrite of the deferred IO/TLB flush code in the AMD IOMMU driver.

     The code suffered from very high flush rates, with the new
     implementation the flush rate is down to ~1% of what it was before

   - Support for amd_iommu=off when booting with kexec.

     The problem here was that the IOMMU driver bailed out early without
     disabling the iommu hardware, if it was enabled in the old kernel

   - The Rockchip IOMMU driver is now available on ARM64

   - Align the return value of the iommu_ops->device_group call-backs to
     not miss error values

   - Preempt-disable optimizations in the Intel VT-d and common IOVA
     code to help Linux-RT

   - Various other small cleanups and fixes"

* tag 'iommu-updates-v4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (60 commits)
  iommu/vt-d: Constify intel_dma_ops
  iommu: Warn once when device_group callback returns NULL
  iommu/omap: Return ERR_PTR in device_group call-back
  iommu: Return ERR_PTR() values from device_group call-backs
  iommu/s390: Use iommu_group_get_for_dev() in s390_iommu_add_device()
  iommu/vt-d: Don't disable preemption while accessing deferred_flush()
  iommu/iova: Don't disable preempt around this_cpu_ptr()
  iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #126
  iommu/arm-smmu-v3: Enable ACPI based HiSilicon CMD_PREFETCH quirk(erratum 161010701)
  iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #74
  ACPI/IORT: Fixup SMMUv3 resource size for Cavium ThunderX2 SMMUv3 model
  iommu/arm-smmu-v3, acpi: Add temporary Cavium SMMU-V3 IORT model number definitions
  iommu/io-pgtable-arm: Use dma_wmb() instead of wmb() when publishing table
  iommu/io-pgtable: depend on !GENERIC_ATOMIC64 when using COMPILE_TEST with LPAE
  iommu/arm-smmu-v3: Remove io-pgtable spinlock
  iommu/arm-smmu: Remove io-pgtable spinlock
  iommu/io-pgtable-arm-v7s: Support lockless operation
  iommu/io-pgtable-arm: Support lockless operation
  iommu/io-pgtable: Introduce explicit coherency
  iommu/io-pgtable-arm-v7s: Refactor split_blk_unmap
  ...
parents 6b1c776d 6a708643
Loading
Loading
Loading
Loading
+4 −1
Original line number Original line Diff line number Diff line
@@ -61,12 +61,15 @@ stable kernels.
| Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
| Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
| Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
| Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
| Cavium         | ThunderX SMMUv2 | #27704          | N/A                         |
| Cavium         | ThunderX Core   | #30115          | CAVIUM_ERRATUM_30115        |
| Cavium         | ThunderX Core   | #30115          | CAVIUM_ERRATUM_30115        |
| Cavium         | ThunderX SMMUv2 | #27704          | N/A                         |
| Cavium         | ThunderX2 SMMUv3| #74             | N/A                         |
| Cavium         | ThunderX2 SMMUv3| #126            | N/A                         |
|                |                 |                 |                             |
|                |                 |                 |                             |
| Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
| Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
|                |                 |                 |                             |
|                |                 |                 |                             |
| Hisilicon      | Hip0{5,6,7}     | #161010101      | HISILICON_ERRATUM_161010101 |
| Hisilicon      | Hip0{5,6,7}     | #161010101      | HISILICON_ERRATUM_161010101 |
| Hisilicon      | Hip0{6,7}       | #161010701      | N/A                         |
|                |                 |                 |                             |
|                |                 |                 |                             |
| Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
| Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
| Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
| Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
+12 −0
Original line number Original line Diff line number Diff line
@@ -26,6 +26,12 @@ the PCIe specification.
                      * "priq"      - PRI Queue not empty
                      * "priq"      - PRI Queue not empty
                      * "cmdq-sync" - CMD_SYNC complete
                      * "cmdq-sync" - CMD_SYNC complete
                      * "gerror"    - Global Error activated
                      * "gerror"    - Global Error activated
                      * "combined"  - The combined interrupt is optional,
				      and should only be provided if the
				      hardware supports just a single,
				      combined interrupt line.
				      If provided, then the combined interrupt
				      will be used in preference to any others.


- #iommu-cells      : See the generic IOMMU binding described in
- #iommu-cells      : See the generic IOMMU binding described in
                        devicetree/bindings/pci/pci-iommu.txt
                        devicetree/bindings/pci/pci-iommu.txt
@@ -49,6 +55,12 @@ the PCIe specification.
- hisilicon,broken-prefetch-cmd
- hisilicon,broken-prefetch-cmd
                    : Avoid sending CMD_PREFETCH_* commands to the SMMU.
                    : Avoid sending CMD_PREFETCH_* commands to the SMMU.


- cavium,cn9900-broken-page1-regspace
                    : Replaces all page 1 offsets used for EVTQ_PROD/CONS,
		      PRIQ_PROD/CONS register access with page 0 offsets.
		      Set for Cavium ThunderX2 silicon that doesn't support
		      SMMU page1 register space.

** Example
** Example


        smmu@2b400000 {
        smmu@2b400000 {
+63 −20
Original line number Original line Diff line number Diff line
@@ -31,6 +31,11 @@
#define IORT_IOMMU_TYPE		((1 << ACPI_IORT_NODE_SMMU) |	\
#define IORT_IOMMU_TYPE		((1 << ACPI_IORT_NODE_SMMU) |	\
				(1 << ACPI_IORT_NODE_SMMU_V3))
				(1 << ACPI_IORT_NODE_SMMU_V3))


/* Until ACPICA headers cover IORT rev. C */
#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX		0x2
#endif

struct iort_its_msi_chip {
struct iort_its_msi_chip {
	struct list_head	list;
	struct list_head	list;
	struct fwnode_handle	*fw_node;
	struct fwnode_handle	*fw_node;
@@ -819,6 +824,36 @@ static int __init arm_smmu_v3_count_resources(struct acpi_iort_node *node)
	return num_res;
	return num_res;
}
}


static bool arm_smmu_v3_is_combined_irq(struct acpi_iort_smmu_v3 *smmu)
{
	/*
	 * Cavium ThunderX2 implementation doesn't not support unique
	 * irq line. Use single irq line for all the SMMUv3 interrupts.
	 */
	if (smmu->model != ACPI_IORT_SMMU_V3_CAVIUM_CN99XX)
		return false;

	/*
	 * ThunderX2 doesn't support MSIs from the SMMU, so we're checking
	 * SPI numbers here.
	 */
	return smmu->event_gsiv == smmu->pri_gsiv &&
	       smmu->event_gsiv == smmu->gerr_gsiv &&
	       smmu->event_gsiv == smmu->sync_gsiv;
}

static unsigned long arm_smmu_v3_resource_size(struct acpi_iort_smmu_v3 *smmu)
{
	/*
	 * Override the size, for Cavium ThunderX2 implementation
	 * which doesn't support the page 1 SMMU register space.
	 */
	if (smmu->model == ACPI_IORT_SMMU_V3_CAVIUM_CN99XX)
		return SZ_64K;

	return SZ_128K;
}

static void __init arm_smmu_v3_init_resources(struct resource *res,
static void __init arm_smmu_v3_init_resources(struct resource *res,
					      struct acpi_iort_node *node)
					      struct acpi_iort_node *node)
{
{
@@ -829,10 +864,17 @@ static void __init arm_smmu_v3_init_resources(struct resource *res,
	smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
	smmu = (struct acpi_iort_smmu_v3 *)node->node_data;


	res[num_res].start = smmu->base_address;
	res[num_res].start = smmu->base_address;
	res[num_res].end = smmu->base_address + SZ_128K - 1;
	res[num_res].end = smmu->base_address +
				arm_smmu_v3_resource_size(smmu) - 1;
	res[num_res].flags = IORESOURCE_MEM;
	res[num_res].flags = IORESOURCE_MEM;


	num_res++;
	num_res++;
	if (arm_smmu_v3_is_combined_irq(smmu)) {
		if (smmu->event_gsiv)
			acpi_iort_register_irq(smmu->event_gsiv, "combined",
					       ACPI_EDGE_SENSITIVE,
					       &res[num_res++]);
	} else {


		if (smmu->event_gsiv)
		if (smmu->event_gsiv)
			acpi_iort_register_irq(smmu->event_gsiv, "eventq",
			acpi_iort_register_irq(smmu->event_gsiv, "eventq",
@@ -854,6 +896,7 @@ static void __init arm_smmu_v3_init_resources(struct resource *res,
					       ACPI_EDGE_SENSITIVE,
					       ACPI_EDGE_SENSITIVE,
					       &res[num_res++]);
					       &res[num_res++]);
	}
	}
}


static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
{
{
+3 −3
Original line number Original line Diff line number Diff line
@@ -23,7 +23,7 @@ config IOMMU_IO_PGTABLE
config IOMMU_IO_PGTABLE_LPAE
config IOMMU_IO_PGTABLE_LPAE
	bool "ARMv7/v8 Long Descriptor Format"
	bool "ARMv7/v8 Long Descriptor Format"
	select IOMMU_IO_PGTABLE
	select IOMMU_IO_PGTABLE
	depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST)
	depends on HAS_DMA && (ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64))
	help
	help
	  Enable support for the ARM long descriptor pagetable format.
	  Enable support for the ARM long descriptor pagetable format.
	  This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
	  This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
@@ -219,7 +219,7 @@ config OMAP_IOMMU_DEBUG


config ROCKCHIP_IOMMU
config ROCKCHIP_IOMMU
	bool "Rockchip IOMMU Support"
	bool "Rockchip IOMMU Support"
	depends on ARM
	depends on ARM || ARM64
	depends on ARCH_ROCKCHIP || COMPILE_TEST
	depends on ARCH_ROCKCHIP || COMPILE_TEST
	select IOMMU_API
	select IOMMU_API
	select ARM_DMA_USE_IOMMU
	select ARM_DMA_USE_IOMMU
@@ -274,7 +274,7 @@ config EXYNOS_IOMMU_DEBUG


config IPMMU_VMSA
config IPMMU_VMSA
	bool "Renesas VMSA-compatible IPMMU"
	bool "Renesas VMSA-compatible IPMMU"
	depends on ARM_LPAE
	depends on ARM || IOMMU_DMA
	depends on ARCH_RENESAS || COMPILE_TEST
	depends on ARCH_RENESAS || COMPILE_TEST
	select IOMMU_API
	select IOMMU_API
	select IOMMU_IO_PGTABLE_LPAE
	select IOMMU_IO_PGTABLE_LPAE
+292 −166
Original line number Original line Diff line number Diff line
@@ -91,25 +91,6 @@ LIST_HEAD(ioapic_map);
LIST_HEAD(hpet_map);
LIST_HEAD(hpet_map);
LIST_HEAD(acpihid_map);
LIST_HEAD(acpihid_map);


#define FLUSH_QUEUE_SIZE 256

struct flush_queue_entry {
	unsigned long iova_pfn;
	unsigned long pages;
	struct dma_ops_domain *dma_dom;
};

struct flush_queue {
	spinlock_t lock;
	unsigned next;
	struct flush_queue_entry *entries;
};

static DEFINE_PER_CPU(struct flush_queue, flush_queue);

static atomic_t queue_timer_on;
static struct timer_list queue_timer;

/*
/*
 * Domain for untranslated devices - only allocated
 * Domain for untranslated devices - only allocated
 * if iommu=pt passed on kernel cmd line.
 * if iommu=pt passed on kernel cmd line.
@@ -140,6 +121,8 @@ struct iommu_dev_data {
					     PPR completions */
					     PPR completions */
	u32 errata;			  /* Bitmap for errata to apply */
	u32 errata;			  /* Bitmap for errata to apply */
	bool use_vapic;			  /* Enable device to use vapic mode */
	bool use_vapic;			  /* Enable device to use vapic mode */

	struct ratelimit_state rs;	  /* Ratelimit IOPF messages */
};
};


/*
/*
@@ -155,6 +138,20 @@ static void update_domain(struct protection_domain *domain);
static int protection_domain_init(struct protection_domain *domain);
static int protection_domain_init(struct protection_domain *domain);
static void detach_device(struct device *dev);
static void detach_device(struct device *dev);


#define FLUSH_QUEUE_SIZE 256

struct flush_queue_entry {
	unsigned long iova_pfn;
	unsigned long pages;
	u64 counter; /* Flush counter when this entry was added to the queue */
};

struct flush_queue {
	struct flush_queue_entry *entries;
	unsigned head, tail;
	spinlock_t lock;
};

/*
/*
 * Data container for a dma_ops specific protection domain
 * Data container for a dma_ops specific protection domain
 */
 */
@@ -164,6 +161,36 @@ struct dma_ops_domain {


	/* IOVA RB-Tree */
	/* IOVA RB-Tree */
	struct iova_domain iovad;
	struct iova_domain iovad;

	struct flush_queue __percpu *flush_queue;

	/*
	 * We need two counter here to be race-free wrt. IOTLB flushing and
	 * adding entries to the flush queue.
	 *
	 * The flush_start_cnt is incremented _before_ the IOTLB flush starts.
	 * New entries added to the flush ring-buffer get their 'counter' value
	 * from here. This way we can make sure that entries added to the queue
	 * (or other per-cpu queues of the same domain) while the TLB is about
	 * to be flushed are not considered to be flushed already.
	 */
	atomic64_t flush_start_cnt;

	/*
	 * The flush_finish_cnt is incremented when an IOTLB flush is complete.
	 * This value is always smaller than flush_start_cnt. The queue_add
	 * function frees all IOVAs that have a counter value smaller than
	 * flush_finish_cnt. This makes sure that we only free IOVAs that are
	 * flushed out of the IOTLB of the domain.
	 */
	atomic64_t flush_finish_cnt;

	/*
	 * Timer to make sure we don't keep IOVAs around unflushed
	 * for too long
	 */
	struct timer_list flush_timer;
	atomic_t flush_timer_on;
};
};


static struct iova_domain reserved_iova_ranges;
static struct iova_domain reserved_iova_ranges;
@@ -255,6 +282,8 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
	list_add_tail(&dev_data->dev_data_list, &dev_data_list);
	list_add_tail(&dev_data->dev_data_list, &dev_data_list);
	spin_unlock_irqrestore(&dev_data_list_lock, flags);
	spin_unlock_irqrestore(&dev_data_list_lock, flags);


	ratelimit_default_init(&dev_data->rs);

	return dev_data;
	return dev_data;
}
}


@@ -553,6 +582,29 @@ static void dump_command(unsigned long phys_addr)
		pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
		pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
}
}


static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
					u64 address, int flags)
{
	struct iommu_dev_data *dev_data = NULL;
	struct pci_dev *pdev;

	pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff);
	if (pdev)
		dev_data = get_dev_data(&pdev->dev);

	if (dev_data && __ratelimit(&dev_data->rs)) {
		dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n",
			domain_id, address, flags);
	} else if (printk_ratelimit()) {
		pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n",
			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
			domain_id, address, flags);
	}

	if (pdev)
		pci_dev_put(pdev);
}

static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
{
{
	int type, devid, domid, flags;
	int type, devid, domid, flags;
@@ -577,7 +629,12 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
		goto retry;
		goto retry;
	}
	}


	if (type == EVENT_TYPE_IO_FAULT) {
		amd_iommu_report_page_fault(devid, domid, address, flags);
		return;
	} else {
		printk(KERN_ERR "AMD-Vi: Event logged [");
		printk(KERN_ERR "AMD-Vi: Event logged [");
	}


	switch (type) {
	switch (type) {
	case EVENT_TYPE_ILL_DEV:
	case EVENT_TYPE_ILL_DEV:
@@ -587,12 +644,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
		       address, flags);
		       address, flags);
		dump_dte_entry(devid);
		dump_dte_entry(devid);
		break;
		break;
	case EVENT_TYPE_IO_FAULT:
		printk("IO_PAGE_FAULT device=%02x:%02x.%x "
		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
		       domid, address, flags);
		break;
	case EVENT_TYPE_DEV_TAB_ERR:
	case EVENT_TYPE_DEV_TAB_ERR:
		printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
		printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
		       "address=0x%016llx flags=0x%04x]\n",
		       "address=0x%016llx flags=0x%04x]\n",
@@ -850,19 +901,20 @@ static int wait_on_sem(volatile u64 *sem)
}
}


static void copy_cmd_to_buffer(struct amd_iommu *iommu,
static void copy_cmd_to_buffer(struct amd_iommu *iommu,
			       struct iommu_cmd *cmd,
			       struct iommu_cmd *cmd)
			       u32 tail)
{
{
	u8 *target;
	u8 *target;


	target = iommu->cmd_buf + tail;
	target = iommu->cmd_buf + iommu->cmd_buf_tail;
	tail   = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;

	iommu->cmd_buf_tail += sizeof(*cmd);
	iommu->cmd_buf_tail %= CMD_BUFFER_SIZE;


	/* Copy command to buffer */
	/* Copy command to buffer */
	memcpy(target, cmd, sizeof(*cmd));
	memcpy(target, cmd, sizeof(*cmd));


	/* Tell the IOMMU about it */
	/* Tell the IOMMU about it */
	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
	writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
}
}


static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
@@ -1020,33 +1072,34 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu,
				      struct iommu_cmd *cmd,
				      struct iommu_cmd *cmd,
				      bool sync)
				      bool sync)
{
{
	u32 left, tail, head, next_tail;
	unsigned int count = 0;
	u32 left, next_tail;


	next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
again:
again:

	left      = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE;
	head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
	tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
	next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
	left      = (head - next_tail) % CMD_BUFFER_SIZE;


	if (left <= 0x20) {
	if (left <= 0x20) {
		struct iommu_cmd sync_cmd;
		/* Skip udelay() the first time around */
		int ret;
		if (count++) {

			if (count == LOOP_TIMEOUT) {
		iommu->cmd_sem = 0;
				pr_err("AMD-Vi: Command buffer timeout\n");
				return -EIO;
			}


		build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
			udelay(1);
		copy_cmd_to_buffer(iommu, &sync_cmd, tail);
		}


		if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
		/* Update head and recheck remaining space */
			return ret;
		iommu->cmd_buf_head = readl(iommu->mmio_base +
					    MMIO_CMD_HEAD_OFFSET);


		goto again;
		goto again;
	}
	}


	copy_cmd_to_buffer(iommu, cmd, tail);
	copy_cmd_to_buffer(iommu, cmd);


	/* We need to sync now to make sure all commands are processed */
	/* Do we need to make sure all commands are processed? */
	iommu->need_sync = sync;
	iommu->need_sync = sync;


	return 0;
	return 0;
@@ -1735,6 +1788,180 @@ static void free_gcr3_table(struct protection_domain *domain)
	free_page((unsigned long)domain->gcr3_tbl);
	free_page((unsigned long)domain->gcr3_tbl);
}
}


static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom)
{
	int cpu;

	for_each_possible_cpu(cpu) {
		struct flush_queue *queue;

		queue = per_cpu_ptr(dom->flush_queue, cpu);
		kfree(queue->entries);
	}

	free_percpu(dom->flush_queue);

	dom->flush_queue = NULL;
}

static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
{
	int cpu;

	atomic64_set(&dom->flush_start_cnt,  0);
	atomic64_set(&dom->flush_finish_cnt, 0);

	dom->flush_queue = alloc_percpu(struct flush_queue);
	if (!dom->flush_queue)
		return -ENOMEM;

	/* First make sure everything is cleared */
	for_each_possible_cpu(cpu) {
		struct flush_queue *queue;

		queue = per_cpu_ptr(dom->flush_queue, cpu);
		queue->head    = 0;
		queue->tail    = 0;
		queue->entries = NULL;
	}

	/* Now start doing the allocation */
	for_each_possible_cpu(cpu) {
		struct flush_queue *queue;

		queue = per_cpu_ptr(dom->flush_queue, cpu);
		queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries),
					 GFP_KERNEL);
		if (!queue->entries) {
			dma_ops_domain_free_flush_queue(dom);
			return -ENOMEM;
		}

		spin_lock_init(&queue->lock);
	}

	return 0;
}

static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
{
	atomic64_inc(&dom->flush_start_cnt);
	domain_flush_tlb(&dom->domain);
	domain_flush_complete(&dom->domain);
	atomic64_inc(&dom->flush_finish_cnt);
}

static inline bool queue_ring_full(struct flush_queue *queue)
{
	assert_spin_locked(&queue->lock);

	return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
}

#define queue_ring_for_each(i, q) \
	for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)

static inline unsigned queue_ring_add(struct flush_queue *queue)
{
	unsigned idx = queue->tail;

	assert_spin_locked(&queue->lock);
	queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;

	return idx;
}

static inline void queue_ring_remove_head(struct flush_queue *queue)
{
	assert_spin_locked(&queue->lock);
	queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
}

static void queue_ring_free_flushed(struct dma_ops_domain *dom,
				    struct flush_queue *queue)
{
	u64 counter = atomic64_read(&dom->flush_finish_cnt);
	int idx;

	queue_ring_for_each(idx, queue) {
		/*
		 * This assumes that counter values in the ring-buffer are
		 * monotonously rising.
		 */
		if (queue->entries[idx].counter >= counter)
			break;

		free_iova_fast(&dom->iovad,
			       queue->entries[idx].iova_pfn,
			       queue->entries[idx].pages);

		queue_ring_remove_head(queue);
	}
}

static void queue_add(struct dma_ops_domain *dom,
		      unsigned long address, unsigned long pages)
{
	struct flush_queue *queue;
	unsigned long flags;
	int idx;

	pages     = __roundup_pow_of_two(pages);
	address >>= PAGE_SHIFT;

	queue = get_cpu_ptr(dom->flush_queue);
	spin_lock_irqsave(&queue->lock, flags);

	/*
	 * First remove the enries from the ring-buffer that are already
	 * flushed to make the below queue_ring_full() check less likely
	 */
	queue_ring_free_flushed(dom, queue);

	/*
	 * When ring-queue is full, flush the entries from the IOTLB so
	 * that we can free all entries with queue_ring_free_flushed()
	 * below.
	 */
	if (queue_ring_full(queue)) {
		dma_ops_domain_flush_tlb(dom);
		queue_ring_free_flushed(dom, queue);
	}

	idx = queue_ring_add(queue);

	queue->entries[idx].iova_pfn = address;
	queue->entries[idx].pages    = pages;
	queue->entries[idx].counter  = atomic64_read(&dom->flush_start_cnt);

	spin_unlock_irqrestore(&queue->lock, flags);

	if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0)
		mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10));

	put_cpu_ptr(dom->flush_queue);
}

static void queue_flush_timeout(unsigned long data)
{
	struct dma_ops_domain *dom = (struct dma_ops_domain *)data;
	int cpu;

	atomic_set(&dom->flush_timer_on, 0);

	dma_ops_domain_flush_tlb(dom);

	for_each_possible_cpu(cpu) {
		struct flush_queue *queue;
		unsigned long flags;

		queue = per_cpu_ptr(dom->flush_queue, cpu);
		spin_lock_irqsave(&queue->lock, flags);
		queue_ring_free_flushed(dom, queue);
		spin_unlock_irqrestore(&queue->lock, flags);
	}
}

/*
/*
 * Free a domain, only used if something went wrong in the
 * Free a domain, only used if something went wrong in the
 * allocation path and we need to free an already allocated page table
 * allocation path and we need to free an already allocated page table
@@ -1746,6 +1973,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)


	del_domain_from_list(&dom->domain);
	del_domain_from_list(&dom->domain);


	if (timer_pending(&dom->flush_timer))
		del_timer(&dom->flush_timer);

	dma_ops_domain_free_flush_queue(dom);

	put_iova_domain(&dom->iovad);
	put_iova_domain(&dom->iovad);


	free_pagetable(&dom->domain);
	free_pagetable(&dom->domain);
@@ -1784,6 +2016,14 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
	/* Initialize reserved ranges */
	/* Initialize reserved ranges */
	copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
	copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);


	if (dma_ops_domain_alloc_flush_queue(dma_dom))
		goto free_dma_dom;

	setup_timer(&dma_dom->flush_timer, queue_flush_timeout,
		    (unsigned long)dma_dom);

	atomic_set(&dma_dom->flush_timer_on, 0);

	add_domain_to_list(&dma_dom->domain);
	add_domain_to_list(&dma_dom->domain);


	return dma_dom;
	return dma_dom;
@@ -1846,7 +2086,8 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
		flags    |= tmp;
		flags    |= tmp;
	}
	}


	flags &= ~(0xffffUL);

	flags &= ~(DTE_FLAG_SA | 0xffffULL);
	flags |= domain->id;
	flags |= domain->id;


	amd_iommu_dev_table[devid].data[1]  = flags;
	amd_iommu_dev_table[devid].data[1]  = flags;
@@ -2227,92 +2468,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
 *
 *
 *****************************************************************************/
 *****************************************************************************/


static void __queue_flush(struct flush_queue *queue)
{
	struct protection_domain *domain;
	unsigned long flags;
	int idx;

	/* First flush TLB of all known domains */
	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
	list_for_each_entry(domain, &amd_iommu_pd_list, list)
		domain_flush_tlb(domain);
	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);

	/* Wait until flushes have completed */
	domain_flush_complete(NULL);

	for (idx = 0; idx < queue->next; ++idx) {
		struct flush_queue_entry *entry;

		entry = queue->entries + idx;

		free_iova_fast(&entry->dma_dom->iovad,
				entry->iova_pfn,
				entry->pages);

		/* Not really necessary, just to make sure we catch any bugs */
		entry->dma_dom = NULL;
	}

	queue->next = 0;
}

static void queue_flush_all(void)
{
	int cpu;

	for_each_possible_cpu(cpu) {
		struct flush_queue *queue;
		unsigned long flags;

		queue = per_cpu_ptr(&flush_queue, cpu);
		spin_lock_irqsave(&queue->lock, flags);
		if (queue->next > 0)
			__queue_flush(queue);
		spin_unlock_irqrestore(&queue->lock, flags);
	}
}

static void queue_flush_timeout(unsigned long unsused)
{
	atomic_set(&queue_timer_on, 0);
	queue_flush_all();
}

static void queue_add(struct dma_ops_domain *dma_dom,
		      unsigned long address, unsigned long pages)
{
	struct flush_queue_entry *entry;
	struct flush_queue *queue;
	unsigned long flags;
	int idx;

	pages     = __roundup_pow_of_two(pages);
	address >>= PAGE_SHIFT;

	queue = get_cpu_ptr(&flush_queue);
	spin_lock_irqsave(&queue->lock, flags);

	if (queue->next == FLUSH_QUEUE_SIZE)
		__queue_flush(queue);

	idx   = queue->next++;
	entry = queue->entries + idx;

	entry->iova_pfn = address;
	entry->pages    = pages;
	entry->dma_dom  = dma_dom;

	spin_unlock_irqrestore(&queue->lock, flags);

	if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0)
		mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10));

	put_cpu_ptr(&flush_queue);
}


/*
/*
 * In the dma_ops path we only have the struct device. This function
 * In the dma_ops path we only have the struct device. This function
 * finds the corresponding IOMMU, the protection domain and the
 * finds the corresponding IOMMU, the protection domain and the
@@ -2807,7 +2962,7 @@ static int init_reserved_iova_ranges(void)


int __init amd_iommu_init_api(void)
int __init amd_iommu_init_api(void)
{
{
	int ret, cpu, err = 0;
	int ret, err = 0;


	ret = iova_cache_get();
	ret = iova_cache_get();
	if (ret)
	if (ret)
@@ -2817,18 +2972,6 @@ int __init amd_iommu_init_api(void)
	if (ret)
	if (ret)
		return ret;
		return ret;


	for_each_possible_cpu(cpu) {
		struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);

		queue->entries = kzalloc(FLUSH_QUEUE_SIZE *
					 sizeof(*queue->entries),
					 GFP_KERNEL);
		if (!queue->entries)
			goto out_put_iova;

		spin_lock_init(&queue->lock);
	}

	err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
	err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
	if (err)
	if (err)
		return err;
		return err;
@@ -2840,23 +2983,12 @@ int __init amd_iommu_init_api(void)
	err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops);
	err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops);
	if (err)
	if (err)
		return err;
		return err;
	return 0;


out_put_iova:
	return 0;
	for_each_possible_cpu(cpu) {
		struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);

		kfree(queue->entries);
	}

	return -ENOMEM;
}
}


int __init amd_iommu_init_dma_ops(void)
int __init amd_iommu_init_dma_ops(void)
{
{
	setup_timer(&queue_timer, queue_flush_timeout, 0);
	atomic_set(&queue_timer_on, 0);

	swiotlb        = iommu_pass_through ? 1 : 0;
	swiotlb        = iommu_pass_through ? 1 : 0;
	iommu_detected = 1;
	iommu_detected = 1;


@@ -3012,12 +3144,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)


	switch (dom->type) {
	switch (dom->type) {
	case IOMMU_DOMAIN_DMA:
	case IOMMU_DOMAIN_DMA:
		/*
		 * First make sure the domain is no longer referenced from the
		 * flush queue
		 */
		queue_flush_all();

		/* Now release the domain */
		/* Now release the domain */
		dma_dom = to_dma_ops_domain(domain);
		dma_dom = to_dma_ops_domain(domain);
		dma_ops_domain_free(dma_dom);
		dma_ops_domain_free(dma_dom);
@@ -4281,7 +4407,7 @@ static void irq_remapping_deactivate(struct irq_domain *domain,
					    irte_info->index);
					    irte_info->index);
}
}


static struct irq_domain_ops amd_ir_domain_ops = {
static const struct irq_domain_ops amd_ir_domain_ops = {
	.alloc = irq_remapping_alloc,
	.alloc = irq_remapping_alloc,
	.free = irq_remapping_free,
	.free = irq_remapping_free,
	.activate = irq_remapping_activate,
	.activate = irq_remapping_activate,
Loading