Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0e3d5b21 authored by Vinod Koul's avatar Vinod Koul
Browse files

Merge branch 'topic/ioatdma' into for-linus

parents 8795d143 c997e30e
Loading
Loading
Loading
Loading
+72 −196
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#include <linux/dma-mapping.h>
#include <linux/workqueue.h>
#include <linux/prefetch.h>
#include <linux/sizes.h>
#include "dma.h"
#include "registers.h"
#include "hw.h"
@@ -290,24 +291,30 @@ static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
}

static struct ioat_ring_ent *
ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags)
{
	struct ioat_dma_descriptor *hw;
	struct ioat_ring_ent *desc;
	struct ioatdma_device *ioat_dma;
	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
	int chunk;
	dma_addr_t phys;
	u8 *pos;
	off_t offs;

	ioat_dma = to_ioatdma_device(chan->device);
	hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys);
	if (!hw)
		return NULL;

	chunk = idx / IOAT_DESCS_PER_2M;
	idx &= (IOAT_DESCS_PER_2M - 1);
	offs = idx * IOAT_DESC_SZ;
	pos = (u8 *)ioat_chan->descs[chunk].virt + offs;
	phys = ioat_chan->descs[chunk].hw + offs;
	hw = (struct ioat_dma_descriptor *)pos;
	memset(hw, 0, sizeof(*hw));

	desc = kmem_cache_zalloc(ioat_cache, flags);
	if (!desc) {
		pci_pool_free(ioat_dma->dma_pool, hw, phys);
	if (!desc)
		return NULL;
	}

	dma_async_tx_descriptor_init(&desc->txd, chan);
	desc->txd.tx_submit = ioat_tx_submit_unlock;
@@ -318,156 +325,79 @@ ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)

void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
{
	struct ioatdma_device *ioat_dma;

	ioat_dma = to_ioatdma_device(chan->device);
	pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys);
	kmem_cache_free(ioat_cache, desc);
}

struct ioat_ring_ent **
ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
{
	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
	struct ioat_ring_ent **ring;
	int descs = 1 << order;
	int i;

	if (order > ioat_get_max_alloc_order())
		return NULL;
	int total_descs = 1 << order;
	int i, chunks;

	/* allocate the array to hold the software ring */
	ring = kcalloc(descs, sizeof(*ring), flags);
	ring = kcalloc(total_descs, sizeof(*ring), flags);
	if (!ring)
		return NULL;
	for (i = 0; i < descs; i++) {
		ring[i] = ioat_alloc_ring_ent(c, flags);
		if (!ring[i]) {
			while (i--)
				ioat_free_ring_ent(ring[i], c);
			kfree(ring);
			return NULL;
		}
		set_desc_id(ring[i], i);
	}

	/* link descs */
	for (i = 0; i < descs-1; i++) {
		struct ioat_ring_ent *next = ring[i+1];
		struct ioat_dma_descriptor *hw = ring[i]->hw;

		hw->next = next->txd.phys;
	}
	ring[i]->hw->next = ring[0]->txd.phys;

	return ring;
}

static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order)
{
	/* reshape differs from normal ring allocation in that we want
	 * to allocate a new software ring while only
	 * extending/truncating the hardware ring
	 */
	struct dma_chan *c = &ioat_chan->dma_chan;
	const u32 curr_size = ioat_ring_size(ioat_chan);
	const u16 active = ioat_ring_active(ioat_chan);
	const u32 new_size = 1 << order;
	struct ioat_ring_ent **ring;
	u32 i;

	if (order > ioat_get_max_alloc_order())
		return false;

	/* double check that we have at least 1 free descriptor */
	if (active == curr_size)
		return false;

	/* when shrinking, verify that we can hold the current active
	 * set in the new ring
	 */
	if (active >= new_size)
		return false;
	ioat_chan->desc_chunks = chunks = (total_descs * IOAT_DESC_SZ) / SZ_2M;

	/* allocate the array to hold the software ring */
	ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
	if (!ring)
		return false;
	for (i = 0; i < chunks; i++) {
		struct ioat_descs *descs = &ioat_chan->descs[i];

	/* allocate/trim descriptors as needed */
	if (new_size > curr_size) {
		/* copy current descriptors to the new ring */
		for (i = 0; i < curr_size; i++) {
			u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
			u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
		descs->virt = dma_alloc_coherent(to_dev(ioat_chan),
						 SZ_2M, &descs->hw, flags);
		if (!descs->virt && (i > 0)) {
			int idx;

			ring[new_idx] = ioat_chan->ring[curr_idx];
			set_desc_id(ring[new_idx], new_idx);
			for (idx = 0; idx < i; idx++) {
				dma_free_coherent(to_dev(ioat_chan), SZ_2M,
						  descs->virt, descs->hw);
				descs->virt = NULL;
				descs->hw = 0;
			}

		/* add new descriptors to the ring */
		for (i = curr_size; i < new_size; i++) {
			u16 new_idx = (ioat_chan->tail+i) & (new_size-1);

			ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT);
			if (!ring[new_idx]) {
				while (i--) {
					u16 new_idx = (ioat_chan->tail+i) &
						       (new_size-1);

					ioat_free_ring_ent(ring[new_idx], c);
				}
			ioat_chan->desc_chunks = 0;
			kfree(ring);
				return false;
			return NULL;
		}
			set_desc_id(ring[new_idx], new_idx);
	}

		/* hw link new descriptors */
		for (i = curr_size-1; i < new_size; i++) {
			u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
			struct ioat_ring_ent *next =
				ring[(new_idx+1) & (new_size-1)];
			struct ioat_dma_descriptor *hw = ring[new_idx]->hw;

			hw->next = next->txd.phys;
		}
	} else {
		struct ioat_dma_descriptor *hw;
		struct ioat_ring_ent *next;
	for (i = 0; i < total_descs; i++) {
		ring[i] = ioat_alloc_ring_ent(c, i, flags);
		if (!ring[i]) {
			int idx;

		/* copy current descriptors to the new ring, dropping the
		 * removed descriptors
		 */
		for (i = 0; i < new_size; i++) {
			u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
			u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
			while (i--)
				ioat_free_ring_ent(ring[i], c);

			ring[new_idx] = ioat_chan->ring[curr_idx];
			set_desc_id(ring[new_idx], new_idx);
			for (idx = 0; idx < ioat_chan->desc_chunks; idx++) {
				dma_free_coherent(to_dev(ioat_chan),
						  SZ_2M,
						  ioat_chan->descs[idx].virt,
						  ioat_chan->descs[idx].hw);
				ioat_chan->descs[idx].virt = NULL;
				ioat_chan->descs[idx].hw = 0;
			}

		/* free deleted descriptors */
		for (i = new_size; i < curr_size; i++) {
			struct ioat_ring_ent *ent;

			ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i);
			ioat_free_ring_ent(ent, c);
			ioat_chan->desc_chunks = 0;
			kfree(ring);
			return NULL;
		}

		/* fix up hardware ring */
		hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw;
		next = ring[(ioat_chan->tail+new_size) & (new_size-1)];
		hw->next = next->txd.phys;
		set_desc_id(ring[i], i);
	}

	dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n",
		__func__, new_size);
	/* link descs */
	for (i = 0; i < total_descs-1; i++) {
		struct ioat_ring_ent *next = ring[i+1];
		struct ioat_dma_descriptor *hw = ring[i]->hw;

	kfree(ioat_chan->ring);
	ioat_chan->ring = ring;
	ioat_chan->alloc_order = order;
		hw->next = next->txd.phys;
	}
	ring[i]->hw->next = ring[0]->txd.phys;

	return true;
	return ring;
}

/**
@@ -478,9 +408,6 @@ static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order)
int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
	__acquires(&ioat_chan->prep_lock)
{
	bool retry;

 retry:
	spin_lock_bh(&ioat_chan->prep_lock);
	/* never allow the last descriptor to be consumed, we need at
	 * least one free at all times to allow for on-the-fly ring
@@ -493,24 +420,8 @@ int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
		ioat_chan->produce = num_descs;
		return 0;  /* with ioat->prep_lock held */
	}
	retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
	spin_unlock_bh(&ioat_chan->prep_lock);

	/* is another cpu already trying to expand the ring? */
	if (retry)
		goto retry;

	spin_lock_bh(&ioat_chan->cleanup_lock);
	spin_lock_bh(&ioat_chan->prep_lock);
	retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1);
	clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
	spin_unlock_bh(&ioat_chan->prep_lock);
	spin_unlock_bh(&ioat_chan->cleanup_lock);

	/* if we were able to expand the ring retry the allocation */
	if (retry)
		goto retry;

	dev_dbg_ratelimited(to_dev(ioat_chan),
			    "%s: ring full! num_descs: %d (%x:%x:%x)\n",
			    __func__, num_descs, ioat_chan->head,
@@ -823,19 +734,6 @@ static void check_active(struct ioatdma_chan *ioat_chan)

	if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
		mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
	else if (ioat_chan->alloc_order > ioat_get_alloc_order()) {
		/* if the ring is idle, empty, and oversized try to step
		 * down the size
		 */
		reshape_ring(ioat_chan, ioat_chan->alloc_order - 1);

		/* keep shrinking until we get back to our minimum
		 * default size
		 */
		if (ioat_chan->alloc_order > ioat_get_alloc_order())
			mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
	}

}

void ioat_timer_event(unsigned long data)
@@ -906,40 +804,6 @@ ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
	return dma_cookie_status(c, cookie, txstate);
}

static int ioat_irq_reinit(struct ioatdma_device *ioat_dma)
{
	struct pci_dev *pdev = ioat_dma->pdev;
	int irq = pdev->irq, i;

	if (!is_bwd_ioat(pdev))
		return 0;

	switch (ioat_dma->irq_mode) {
	case IOAT_MSIX:
		for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) {
			struct msix_entry *msix = &ioat_dma->msix_entries[i];
			struct ioatdma_chan *ioat_chan;

			ioat_chan = ioat_chan_by_index(ioat_dma, i);
			devm_free_irq(&pdev->dev, msix->vector, ioat_chan);
		}

		pci_disable_msix(pdev);
		break;
	case IOAT_MSI:
		pci_disable_msi(pdev);
		/* fall through */
	case IOAT_INTX:
		devm_free_irq(&pdev->dev, irq, ioat_dma);
		break;
	default:
		return 0;
	}
	ioat_dma->irq_mode = IOAT_NOIRQ;

	return ioat_dma_setup_interrupts(ioat_dma);
}

int ioat_reset_hw(struct ioatdma_chan *ioat_chan)
{
	/* throw away whatever the channel was doing and get it
@@ -979,9 +843,21 @@ int ioat_reset_hw(struct ioatdma_chan *ioat_chan)
		}
	}

	if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
		ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000);
		ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008);
		ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800);
	}


	err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200));
	if (!err)
		err = ioat_irq_reinit(ioat_dma);
	if (!err) {
		if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
			writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000);
			writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008);
			writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800);
		}
	}

	if (err)
		dev_err(&pdev->dev, "Failed to reset: %d\n", err);
+15 −8
Original line number Diff line number Diff line
@@ -62,7 +62,6 @@ enum ioat_irq_mode {
 * struct ioatdma_device - internal representation of a IOAT device
 * @pdev: PCI-Express device
 * @reg_base: MMIO register space base address
 * @dma_pool: for allocating DMA descriptors
 * @completion_pool: DMA buffers for completion ops
 * @sed_hw_pool: DMA super descriptor pools
 * @dma_dev: embedded struct dma_device
@@ -76,8 +75,7 @@ enum ioat_irq_mode {
struct ioatdma_device {
	struct pci_dev *pdev;
	void __iomem *reg_base;
	struct pci_pool *dma_pool;
	struct pci_pool *completion_pool;
	struct dma_pool *completion_pool;
#define MAX_SED_POOLS	5
	struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
	struct dma_device dma_dev;
@@ -88,6 +86,16 @@ struct ioatdma_device {
	struct dca_provider *dca;
	enum ioat_irq_mode irq_mode;
	u32 cap;

	/* shadow version for CB3.3 chan reset errata workaround */
	u64 msixtba0;
	u64 msixdata0;
	u32 msixpba;
};

struct ioat_descs {
	void *virt;
	dma_addr_t hw;
};

struct ioatdma_chan {
@@ -100,7 +108,6 @@ struct ioatdma_chan {
	#define IOAT_COMPLETION_ACK 1
	#define IOAT_RESET_PENDING 2
	#define IOAT_KOBJ_INIT_FAIL 3
	#define IOAT_RESHAPE_PENDING 4
	#define IOAT_RUN 5
	#define IOAT_CHAN_ACTIVE 6
	struct timer_list timer;
@@ -133,6 +140,8 @@ struct ioatdma_chan {
	u16 produce;
	struct ioat_ring_ent **ring;
	spinlock_t prep_lock;
	struct ioat_descs descs[2];
	int desc_chunks;
};

struct ioat_sysfs_entry {
@@ -302,10 +311,8 @@ static inline bool is_ioat_bug(unsigned long err)
}

#define IOAT_MAX_ORDER 16
#define ioat_get_alloc_order() \
	(min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
#define ioat_get_max_alloc_order() \
	(min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
#define IOAT_MAX_DESCS 65536
#define IOAT_DESCS_PER_2M 32768

static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan)
{
+2 −0
Original line number Diff line number Diff line
@@ -73,6 +73,8 @@

int system_has_dca_enabled(struct pci_dev *pdev);

#define IOAT_DESC_SZ	64

struct ioat_dma_descriptor {
	uint32_t	size;
	union {
+19 −30
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include <linux/prefetch.h>
#include <linux/dca.h>
#include <linux/aer.h>
#include <linux/sizes.h>
#include "dma.h"
#include "registers.h"
#include "hw.h"
@@ -136,14 +137,6 @@ int ioat_pending_level = 4;
module_param(ioat_pending_level, int, 0644);
MODULE_PARM_DESC(ioat_pending_level,
		 "high-water mark for pushing ioat descriptors (default: 4)");
int ioat_ring_alloc_order = 8;
module_param(ioat_ring_alloc_order, int, 0644);
MODULE_PARM_DESC(ioat_ring_alloc_order,
		 "ioat+: allocate 2^n descriptors per channel (default: 8 max: 16)");
int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
module_param(ioat_ring_max_alloc_order, int, 0644);
MODULE_PARM_DESC(ioat_ring_max_alloc_order,
		 "ioat+: upper limit for ring size (default: 16)");
static char ioat_interrupt_style[32] = "msix";
module_param_string(ioat_interrupt_style, ioat_interrupt_style,
		    sizeof(ioat_interrupt_style), 0644);
@@ -504,23 +497,14 @@ static int ioat_probe(struct ioatdma_device *ioat_dma)
	struct pci_dev *pdev = ioat_dma->pdev;
	struct device *dev = &pdev->dev;

	/* DMA coherent memory pool for DMA descriptor allocations */
	ioat_dma->dma_pool = pci_pool_create("dma_desc_pool", pdev,
					     sizeof(struct ioat_dma_descriptor),
					     64, 0);
	if (!ioat_dma->dma_pool) {
		err = -ENOMEM;
		goto err_dma_pool;
	}

	ioat_dma->completion_pool = pci_pool_create("completion_pool", pdev,
	ioat_dma->completion_pool = dma_pool_create("completion_pool", dev,
						    sizeof(u64),
						    SMP_CACHE_BYTES,
						    SMP_CACHE_BYTES);

	if (!ioat_dma->completion_pool) {
		err = -ENOMEM;
		goto err_completion_pool;
		goto err_out;
	}

	ioat_enumerate_channels(ioat_dma);
@@ -546,10 +530,8 @@ static int ioat_probe(struct ioatdma_device *ioat_dma)
err_self_test:
	ioat_disable_interrupts(ioat_dma);
err_setup_interrupts:
	pci_pool_destroy(ioat_dma->completion_pool);
err_completion_pool:
	pci_pool_destroy(ioat_dma->dma_pool);
err_dma_pool:
	dma_pool_destroy(ioat_dma->completion_pool);
err_out:
	return err;
}

@@ -559,8 +541,7 @@ static int ioat_register(struct ioatdma_device *ioat_dma)

	if (err) {
		ioat_disable_interrupts(ioat_dma);
		pci_pool_destroy(ioat_dma->completion_pool);
		pci_pool_destroy(ioat_dma->dma_pool);
		dma_pool_destroy(ioat_dma->completion_pool);
	}

	return err;
@@ -576,8 +557,7 @@ static void ioat_dma_remove(struct ioatdma_device *ioat_dma)

	dma_async_device_unregister(dma);

	pci_pool_destroy(ioat_dma->dma_pool);
	pci_pool_destroy(ioat_dma->completion_pool);
	dma_pool_destroy(ioat_dma->completion_pool);

	INIT_LIST_HEAD(&dma->channels);
}
@@ -666,10 +646,19 @@ static void ioat_free_chan_resources(struct dma_chan *c)
		ioat_free_ring_ent(desc, c);
	}

	for (i = 0; i < ioat_chan->desc_chunks; i++) {
		dma_free_coherent(to_dev(ioat_chan), SZ_2M,
				  ioat_chan->descs[i].virt,
				  ioat_chan->descs[i].hw);
		ioat_chan->descs[i].virt = NULL;
		ioat_chan->descs[i].hw = 0;
	}
	ioat_chan->desc_chunks = 0;

	kfree(ioat_chan->ring);
	ioat_chan->ring = NULL;
	ioat_chan->alloc_order = 0;
	pci_pool_free(ioat_dma->completion_pool, ioat_chan->completion,
	dma_pool_free(ioat_dma->completion_pool, ioat_chan->completion,
		      ioat_chan->completion_dma);
	spin_unlock_bh(&ioat_chan->prep_lock);
	spin_unlock_bh(&ioat_chan->cleanup_lock);
@@ -701,7 +690,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c)
	/* allocate a completion writeback area */
	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
	ioat_chan->completion =
		pci_pool_alloc(ioat_chan->ioat_dma->completion_pool,
		dma_pool_alloc(ioat_chan->ioat_dma->completion_pool,
			       GFP_KERNEL, &ioat_chan->completion_dma);
	if (!ioat_chan->completion)
		return -ENOMEM;
@@ -712,7 +701,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c)
	writel(((u64)ioat_chan->completion_dma) >> 32,
	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);

	order = ioat_get_alloc_order();
	order = IOAT_MAX_ORDER;
	ring = ioat_alloc_ring(c, order, GFP_KERNEL);
	if (!ring)
		return -ENOMEM;