Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b3ca9af0 authored by VSR Burru's avatar VSR Burru Committed by David S. Miller
Browse files

liquidio: optimize DMA in NUMA systems



Optimize DMA in NUMA systems by allocating memory from NUMA node that NIC
is plugged in to; DMA will no longer cross NUMA nodes.  If NIC IRQs are
pinned to a local CPU, that CPU's access to the DMA'd data is also
optimized.

Signed-off-by: default avatarVSR Burru <veerasenareddy.burru@cavium.com>
Signed-off-by: default avatarFelix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: default avatarRaghu Vatsavayi <raghu.vatsavayi@cavium.com>
Signed-off-by: default avatarSatanand Burla <satananda.burla@cavium.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent de12e4fb
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -782,7 +782,7 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
	}

	for (i = 0; i < num_iqs; i++) {
		int numa_node = cpu_to_node(i % num_online_cpus());
		int numa_node = dev_to_node(&oct->pci_dev->dev);

		spin_lock_init(&lio->glist_lock[i]);

+2 −2
Original line number Diff line number Diff line
@@ -793,7 +793,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
	u32 num_descs = 0;
	u32 iq_no = 0;
	union oct_txpciq txpciq;
	int numa_node = cpu_to_node(iq_no % num_online_cpus());
	int numa_node = dev_to_node(&oct->pci_dev->dev);

	if (OCTEON_CN6XXX(oct))
		num_descs =
@@ -837,7 +837,7 @@ int octeon_setup_output_queues(struct octeon_device *oct)
	u32 num_descs = 0;
	u32 desc_size = 0;
	u32 oq_no = 0;
	int numa_node = cpu_to_node(oq_no % num_online_cpus());
	int numa_node = dev_to_node(&oct->pci_dev->dev);

	if (OCTEON_CN6XXX(oct)) {
		num_descs =
+2 −8
Original line number Diff line number Diff line
@@ -234,8 +234,7 @@ int octeon_init_droq(struct octeon_device *oct,
	struct octeon_droq *droq;
	u32 desc_ring_size = 0, c_num_descs = 0, c_buf_size = 0;
	u32 c_pkts_per_intr = 0, c_refill_threshold = 0;
	int orig_node = dev_to_node(&oct->pci_dev->dev);
	int numa_node = cpu_to_node(q_no % num_online_cpus());
	int numa_node = dev_to_node(&oct->pci_dev->dev);

	dev_dbg(&oct->pci_dev->dev, "%s[%d]\n", __func__, q_no);

@@ -275,11 +274,6 @@ int octeon_init_droq(struct octeon_device *oct,
	droq->buffer_size = c_buf_size;

	desc_ring_size = droq->max_count * OCT_DROQ_DESC_SIZE;
	set_dev_node(&oct->pci_dev->dev, numa_node);
	droq->desc_ring = lio_dma_alloc(oct, desc_ring_size,
					(dma_addr_t *)&droq->desc_ring_dma);
	set_dev_node(&oct->pci_dev->dev, orig_node);
	if (!droq->desc_ring)
	droq->desc_ring = lio_dma_alloc(oct, desc_ring_size,
					(dma_addr_t *)&droq->desc_ring_dma);

@@ -983,7 +977,7 @@ int octeon_create_droq(struct octeon_device *oct,
		       u32 desc_size, void *app_ctx)
{
	struct octeon_droq *droq;
	int numa_node = cpu_to_node(q_no % num_online_cpus());
	int numa_node = dev_to_node(&oct->pci_dev->dev);

	if (oct->droq[q_no]) {
		dev_dbg(&oct->pci_dev->dev, "Droq already in use. Cannot create droq %d again\n",
+1 −1
Original line number Diff line number Diff line
@@ -152,7 +152,7 @@ struct octeon_instr_queue {
	struct oct_iq_stats stats;

	/** DMA mapped base address of the input descriptor ring. */
	u64 base_addr_dma;
	dma_addr_t base_addr_dma;

	/** Application context */
	void *app_ctx;
+3 −10
Original line number Diff line number Diff line
@@ -62,8 +62,7 @@ int octeon_init_instr_queue(struct octeon_device *oct,
	u32 iq_no = (u32)txpciq.s.q_no;
	u32 q_size;
	struct cavium_wq *db_wq;
	int orig_node = dev_to_node(&oct->pci_dev->dev);
	int numa_node = cpu_to_node(iq_no % num_online_cpus());
	int numa_node = dev_to_node(&oct->pci_dev->dev);

	if (OCTEON_CN6XXX(oct))
		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn6xxx)));
@@ -91,13 +90,7 @@ int octeon_init_instr_queue(struct octeon_device *oct,

	iq->oct_dev = oct;

	set_dev_node(&oct->pci_dev->dev, numa_node);
	iq->base_addr = lio_dma_alloc(oct, q_size,
				      (dma_addr_t *)&iq->base_addr_dma);
	set_dev_node(&oct->pci_dev->dev, orig_node);
	if (!iq->base_addr)
		iq->base_addr = lio_dma_alloc(oct, q_size,
					      (dma_addr_t *)&iq->base_addr_dma);
	iq->base_addr = lio_dma_alloc(oct, q_size, &iq->base_addr_dma);
	if (!iq->base_addr) {
		dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n",
			iq_no);
@@ -211,7 +204,7 @@ int octeon_setup_iq(struct octeon_device *oct,
		    void *app_ctx)
{
	u32 iq_no = (u32)txpciq.s.q_no;
	int numa_node = cpu_to_node(iq_no % num_online_cpus());
	int numa_node = dev_to_node(&oct->pci_dev->dev);

	if (oct->instr_queue[iq_no]) {
		dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n",