Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 425902f5 authored by Jason Gunthorpe's avatar Jason Gunthorpe Committed by Greg Kroah-Hartman
Browse files

fpga zynq: Use the scatterlist interface



This allows the driver to avoid a high order coherent DMA allocation
and memory copy. With this patch it can DMA directly from the kernel
pages that the bitfile is stored in.

Since this is now a gather DMA operation the driver uses the ISR
to feed the chips DMA queue with each entry from the SGL.

Signed-off-by: default avatarJason Gunthorpe <jgunthorpe@obsidianresearch.com>
Acked-by: default avatarMoritz Fischer <moritz.fischer@ettus.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent baa6d396
Loading
Loading
Loading
Loading
+135 −39
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
#include <linux/pm.h>
#include <linux/regmap.h>
#include <linux/string.h>
#include <linux/scatterlist.h>

/* Offsets into SLCR regmap */

@@ -80,6 +81,7 @@

/* FPGA init status */
#define STATUS_DMA_Q_F			BIT(31)
#define STATUS_DMA_Q_E			BIT(30)
#define STATUS_PCFG_INIT_MASK		BIT(4)

/* Interrupt Status/Mask Register Bit definitions */
@@ -98,12 +100,16 @@
#define DMA_INVALID_ADDRESS		GENMASK(31, 0)
/* Used to unlock the dev */
#define UNLOCK_MASK			0x757bdf0d
/* Timeout for DMA to complete */
#define DMA_DONE_TIMEOUT		msecs_to_jiffies(1000)
/* Timeout for polling reset bits */
#define INIT_POLL_TIMEOUT		2500000
/* Delay for polling reset bits */
#define INIT_POLL_DELAY			20
/* Signal this is the last DMA transfer, wait for the AXI and PCAP before
 * interrupting
 */
#define DMA_SRC_LAST_TRANSFER		1
/* Timeout for DMA completion */
#define DMA_TIMEOUT_MS			5000

/* Masks for controlling stuff in SLCR */
/* Disable all Level shifters */
@@ -124,6 +130,11 @@ struct zynq_fpga_priv {
	void __iomem *io_base;
	struct regmap *slcr;

	spinlock_t dma_lock;
	unsigned int dma_elm;
	unsigned int dma_nelms;
	struct scatterlist *cur_sg;

	struct completion dma_done;
};

@@ -149,13 +160,80 @@ static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable)
	zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable);
}

/* Must be called with dma_lock held */
static void zynq_step_dma(struct zynq_fpga_priv *priv)
{
	u32 addr;
	u32 len;
	bool first;

	first = priv->dma_elm == 0;
	while (priv->cur_sg) {
		/* Feed the DMA queue until it is full. */
		if (zynq_fpga_read(priv, STATUS_OFFSET) & STATUS_DMA_Q_F)
			break;

		addr = sg_dma_address(priv->cur_sg);
		len = sg_dma_len(priv->cur_sg);
		if (priv->dma_elm + 1 == priv->dma_nelms) {
			/* The last transfer waits for the PCAP to finish too,
			 * notice this also changes the irq_mask to ignore
			 * IXR_DMA_DONE_MASK which ensures we do not trigger
			 * the completion too early.
			 */
			addr |= DMA_SRC_LAST_TRANSFER;
			priv->cur_sg = NULL;
		} else {
			priv->cur_sg = sg_next(priv->cur_sg);
			priv->dma_elm++;
		}

		zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, addr);
		zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, DMA_INVALID_ADDRESS);
		zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, len / 4);
		zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0);
	}

	/* Once the first transfer is queued we can turn on the ISR, future
	 * calls to zynq_step_dma will happen from the ISR context. The
	 * dma_lock spinlock guarentees this handover is done coherently, the
	 * ISR enable is put at the end to avoid another CPU spinning in the
	 * ISR on this lock.
	 */
	if (first && priv->cur_sg) {
		zynq_fpga_set_irq(priv,
				  IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK);
	} else if (!priv->cur_sg) {
		/* The last transfer changes to DMA & PCAP mode since we do
		 * not want to continue until everything has been flushed into
		 * the PCAP.
		 */
		zynq_fpga_set_irq(priv,
				  IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK);
	}
}

static irqreturn_t zynq_fpga_isr(int irq, void *data)
{
	struct zynq_fpga_priv *priv = data;
	u32 intr_status;

	/* disable DMA and error IRQs */
	zynq_fpga_set_irq(priv, 0);
	/* If anything other than DMA completion is reported stop and hand
	 * control back to zynq_fpga_ops_write, something went wrong,
	 * otherwise progress the DMA.
	 */
	spin_lock(&priv->dma_lock);
	intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
	if (!(intr_status & IXR_ERROR_FLAGS_MASK) &&
	    (intr_status & IXR_DMA_DONE_MASK) && priv->cur_sg) {
		zynq_fpga_write(priv, INT_STS_OFFSET, IXR_DMA_DONE_MASK);
		zynq_step_dma(priv);
		spin_unlock(&priv->dma_lock);
		return IRQ_HANDLED;
	}
	spin_unlock(&priv->dma_lock);

	zynq_fpga_set_irq(priv, 0);
	complete(&priv->dma_done);

	return IRQ_HANDLED;
@@ -266,10 +344,11 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
	zynq_fpga_write(priv, CTRL_OFFSET,
			(CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl));

	/* check that we have room in the command queue */
	/* We expect that the command queue is empty right now. */
	status = zynq_fpga_read(priv, STATUS_OFFSET);
	if (status & STATUS_DMA_Q_F) {
		dev_err(&mgr->dev, "DMA command queue full\n");
	if ((status & STATUS_DMA_Q_F) ||
	    (status & STATUS_DMA_Q_E) != STATUS_DMA_Q_E) {
		dev_err(&mgr->dev, "DMA command queue not right\n");
		err = -EBUSY;
		goto out_err;
	}
@@ -288,27 +367,36 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
	return err;
}

static int zynq_fpga_ops_write(struct fpga_manager *mgr,
			       const char *buf, size_t count)
static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt)
{
	struct zynq_fpga_priv *priv;
	const char *why;
	int err;
	char *kbuf;
	size_t in_count;
	dma_addr_t dma_addr;
	u32 transfer_length;
	u32 intr_status;
	unsigned long timeout;
	unsigned long flags;
	struct scatterlist *sg;
	int i;

	in_count = count;
	priv = mgr->priv;

	kbuf =
	    dma_alloc_coherent(mgr->dev.parent, count, &dma_addr, GFP_KERNEL);
	if (!kbuf)
		return -ENOMEM;
	/* The hardware can only DMA multiples of 4 bytes, and it requires the
	 * starting addresses to be aligned to 64 bits (UG585 pg 212).
	 */
	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
		if ((sg->offset % 8) || (sg->length % 4)) {
			dev_err(&mgr->dev,
			    "Invalid bitstream, chunks must be aligned\n");
			return -EINVAL;
		}
	}

	memcpy(kbuf, buf, count);
	priv->dma_nelms =
	    dma_map_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
	if (priv->dma_nelms == 0) {
		dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n");
		return -ENOMEM;
	}

	/* enable clock */
	err = clk_enable(priv->clk);
@@ -316,28 +404,31 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
		goto out_free;

	zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);

	reinit_completion(&priv->dma_done);

	/* enable DMA and error IRQs */
	zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK);

	/* the +1 in the src addr is used to hold off on DMA_DONE IRQ
	 * until both AXI and PCAP are done ...
	 */
	zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, (u32)(dma_addr) + 1);
	zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, (u32)DMA_INVALID_ADDRESS);
	/* zynq_step_dma will turn on interrupts */
	spin_lock_irqsave(&priv->dma_lock, flags);
	priv->dma_elm = 0;
	priv->cur_sg = sgt->sgl;
	zynq_step_dma(priv);
	spin_unlock_irqrestore(&priv->dma_lock, flags);

	/* convert #bytes to #words */
	transfer_length = (count + 3) / 4;

	zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, transfer_length);
	zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0);
	timeout = wait_for_completion_timeout(&priv->dma_done,
					      msecs_to_jiffies(DMA_TIMEOUT_MS));

	wait_for_completion(&priv->dma_done);
	spin_lock_irqsave(&priv->dma_lock, flags);
	zynq_fpga_set_irq(priv, 0);
	priv->cur_sg = NULL;
	spin_unlock_irqrestore(&priv->dma_lock, flags);

	intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
	zynq_fpga_write(priv, INT_STS_OFFSET, intr_status);
	zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);

	/* There doesn't seem to be a way to force cancel any DMA, so if
	 * something went wrong we are relying on the hardware to have halted
	 * the DMA before we get here, if there was we could use
	 * wait_for_completion_interruptible too.
	 */

	if (intr_status & IXR_ERROR_FLAGS_MASK) {
		why = "DMA reported error";
@@ -345,7 +436,11 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
		goto out_report;
	}

	if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) {
	if (priv->cur_sg ||
	    !((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) {
		if (timeout == 0)
			why = "DMA timed out";
		else
			why = "DMA did not complete";
		err = -EIO;
		goto out_report;
@@ -369,7 +464,7 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
	clk_disable(priv->clk);

out_free:
	dma_free_coherent(mgr->dev.parent, count, kbuf, dma_addr);
	dma_unmap_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
	return err;
}

@@ -433,7 +528,7 @@ static const struct fpga_manager_ops zynq_fpga_ops = {
	.initial_header_size = 128,
	.state = zynq_fpga_ops_state,
	.write_init = zynq_fpga_ops_write_init,
	.write = zynq_fpga_ops_write,
	.write_sg = zynq_fpga_ops_write,
	.write_complete = zynq_fpga_ops_write_complete,
};

@@ -447,6 +542,7 @@ static int zynq_fpga_probe(struct platform_device *pdev)
	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
	if (!priv)
		return -ENOMEM;
	spin_lock_init(&priv->dma_lock);

	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
	priv->io_base = devm_ioremap_resource(dev, res);