Loading drivers/dma/ioat/dma.c +72 −196 Original line number Diff line number Diff line Loading @@ -31,6 +31,7 @@ #include <linux/dma-mapping.h> #include <linux/workqueue.h> #include <linux/prefetch.h> #include <linux/sizes.h> #include "dma.h" #include "registers.h" #include "hw.h" Loading Loading @@ -290,24 +291,30 @@ static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx) } static struct ioat_ring_ent * ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags) { struct ioat_dma_descriptor *hw; struct ioat_ring_ent *desc; struct ioatdma_device *ioat_dma; struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); int chunk; dma_addr_t phys; u8 *pos; off_t offs; ioat_dma = to_ioatdma_device(chan->device); hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys); if (!hw) return NULL; chunk = idx / IOAT_DESCS_PER_2M; idx &= (IOAT_DESCS_PER_2M - 1); offs = idx * IOAT_DESC_SZ; pos = (u8 *)ioat_chan->descs[chunk].virt + offs; phys = ioat_chan->descs[chunk].hw + offs; hw = (struct ioat_dma_descriptor *)pos; memset(hw, 0, sizeof(*hw)); desc = kmem_cache_zalloc(ioat_cache, flags); if (!desc) { pci_pool_free(ioat_dma->dma_pool, hw, phys); if (!desc) return NULL; } dma_async_tx_descriptor_init(&desc->txd, chan); desc->txd.tx_submit = ioat_tx_submit_unlock; Loading @@ -318,156 +325,79 @@ ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) { struct ioatdma_device *ioat_dma; ioat_dma = to_ioatdma_device(chan->device); pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys); kmem_cache_free(ioat_cache, desc); } struct ioat_ring_ent ** ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) { struct ioatdma_chan *ioat_chan = to_ioat_chan(c); struct ioat_ring_ent **ring; int descs = 1 << order; int i; if (order > ioat_get_max_alloc_order()) return NULL; int total_descs = 1 << order; int i, chunks; /* allocate the array to hold the software ring */ ring = kcalloc(descs, sizeof(*ring), flags); ring = kcalloc(total_descs, sizeof(*ring), flags); if (!ring) return NULL; for (i = 0; i < descs; i++) { ring[i] = ioat_alloc_ring_ent(c, flags); if (!ring[i]) { while (i--) ioat_free_ring_ent(ring[i], c); kfree(ring); return NULL; } set_desc_id(ring[i], i); } /* link descs */ for (i = 0; i < descs-1; i++) { struct ioat_ring_ent *next = ring[i+1]; struct ioat_dma_descriptor *hw = ring[i]->hw; hw->next = next->txd.phys; } ring[i]->hw->next = ring[0]->txd.phys; return ring; } static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order) { /* reshape differs from normal ring allocation in that we want * to allocate a new software ring while only * extending/truncating the hardware ring */ struct dma_chan *c = &ioat_chan->dma_chan; const u32 curr_size = ioat_ring_size(ioat_chan); const u16 active = ioat_ring_active(ioat_chan); const u32 new_size = 1 << order; struct ioat_ring_ent **ring; u32 i; if (order > ioat_get_max_alloc_order()) return false; /* double check that we have at least 1 free descriptor */ if (active == curr_size) return false; /* when shrinking, verify that we can hold the current active * set in the new ring */ if (active >= new_size) return false; ioat_chan->desc_chunks = chunks = (total_descs * IOAT_DESC_SZ) / SZ_2M; /* allocate the array to hold the software ring */ ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); if (!ring) return false; for (i = 0; i < chunks; i++) { struct ioat_descs *descs = &ioat_chan->descs[i]; /* allocate/trim descriptors as needed */ if (new_size > curr_size) { /* copy current descriptors to the new ring */ for (i = 0; i < curr_size; i++) { u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); u16 new_idx = (ioat_chan->tail+i) & (new_size-1); descs->virt = dma_alloc_coherent(to_dev(ioat_chan), SZ_2M, &descs->hw, flags); if (!descs->virt && (i > 0)) { int idx; ring[new_idx] = ioat_chan->ring[curr_idx]; set_desc_id(ring[new_idx], new_idx); for (idx = 0; idx < i; idx++) { dma_free_coherent(to_dev(ioat_chan), SZ_2M, descs->virt, descs->hw); descs->virt = NULL; descs->hw = 0; } /* add new descriptors to the ring */ for (i = curr_size; i < new_size; i++) { u16 new_idx = (ioat_chan->tail+i) & (new_size-1); ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT); if (!ring[new_idx]) { while (i--) { u16 new_idx = (ioat_chan->tail+i) & (new_size-1); ioat_free_ring_ent(ring[new_idx], c); } ioat_chan->desc_chunks = 0; kfree(ring); return false; return NULL; } set_desc_id(ring[new_idx], new_idx); } /* hw link new descriptors */ for (i = curr_size-1; i < new_size; i++) { u16 new_idx = (ioat_chan->tail+i) & (new_size-1); struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)]; struct ioat_dma_descriptor *hw = ring[new_idx]->hw; hw->next = next->txd.phys; } } else { struct ioat_dma_descriptor *hw; struct ioat_ring_ent *next; for (i = 0; i < total_descs; i++) { ring[i] = ioat_alloc_ring_ent(c, i, flags); if (!ring[i]) { int idx; /* copy current descriptors to the new ring, dropping the * removed descriptors */ for (i = 0; i < new_size; i++) { u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); u16 new_idx = (ioat_chan->tail+i) & (new_size-1); while (i--) ioat_free_ring_ent(ring[i], c); ring[new_idx] = ioat_chan->ring[curr_idx]; set_desc_id(ring[new_idx], new_idx); for (idx = 0; idx < ioat_chan->desc_chunks; idx++) { dma_free_coherent(to_dev(ioat_chan), SZ_2M, ioat_chan->descs[idx].virt, ioat_chan->descs[idx].hw); ioat_chan->descs[idx].virt = NULL; ioat_chan->descs[idx].hw = 0; } /* free deleted descriptors */ for (i = new_size; i < curr_size; i++) { struct ioat_ring_ent *ent; ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i); ioat_free_ring_ent(ent, c); ioat_chan->desc_chunks = 0; kfree(ring); return NULL; } /* fix up hardware ring */ hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw; next = ring[(ioat_chan->tail+new_size) & (new_size-1)]; hw->next = next->txd.phys; set_desc_id(ring[i], i); } dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n", __func__, new_size); /* link descs */ for (i = 0; i < total_descs-1; i++) { struct ioat_ring_ent *next = ring[i+1]; struct ioat_dma_descriptor *hw = ring[i]->hw; kfree(ioat_chan->ring); ioat_chan->ring = ring; ioat_chan->alloc_order = order; hw->next = next->txd.phys; } ring[i]->hw->next = ring[0]->txd.phys; return true; return ring; } /** Loading @@ -478,9 +408,6 @@ static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order) int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) __acquires(&ioat_chan->prep_lock) { bool retry; retry: spin_lock_bh(&ioat_chan->prep_lock); /* never allow the last descriptor to be consumed, we need at * least one free at all times to allow for on-the-fly ring Loading @@ -493,24 +420,8 @@ int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) ioat_chan->produce = num_descs; return 0; /* with ioat->prep_lock held */ } retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); spin_unlock_bh(&ioat_chan->prep_lock); /* is another cpu already trying to expand the ring? */ if (retry) goto retry; spin_lock_bh(&ioat_chan->cleanup_lock); spin_lock_bh(&ioat_chan->prep_lock); retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1); clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); spin_unlock_bh(&ioat_chan->prep_lock); spin_unlock_bh(&ioat_chan->cleanup_lock); /* if we were able to expand the ring retry the allocation */ if (retry) goto retry; dev_dbg_ratelimited(to_dev(ioat_chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n", __func__, num_descs, ioat_chan->head, Loading Loading @@ -823,19 +734,6 @@ static void check_active(struct ioatdma_chan *ioat_chan) if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); else if (ioat_chan->alloc_order > ioat_get_alloc_order()) { /* if the ring is idle, empty, and oversized try to step * down the size */ reshape_ring(ioat_chan, ioat_chan->alloc_order - 1); /* keep shrinking until we get back to our minimum * default size */ if (ioat_chan->alloc_order > ioat_get_alloc_order()) mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); } } void ioat_timer_event(unsigned long data) Loading Loading @@ -906,40 +804,6 @@ ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, return dma_cookie_status(c, cookie, txstate); } static int ioat_irq_reinit(struct ioatdma_device *ioat_dma) { struct pci_dev *pdev = ioat_dma->pdev; int irq = pdev->irq, i; if (!is_bwd_ioat(pdev)) return 0; switch (ioat_dma->irq_mode) { case IOAT_MSIX: for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) { struct msix_entry *msix = &ioat_dma->msix_entries[i]; struct ioatdma_chan *ioat_chan; ioat_chan = ioat_chan_by_index(ioat_dma, i); devm_free_irq(&pdev->dev, msix->vector, ioat_chan); } pci_disable_msix(pdev); break; case IOAT_MSI: pci_disable_msi(pdev); /* fall through */ case IOAT_INTX: devm_free_irq(&pdev->dev, irq, ioat_dma); break; default: return 0; } ioat_dma->irq_mode = IOAT_NOIRQ; return ioat_dma_setup_interrupts(ioat_dma); } int ioat_reset_hw(struct ioatdma_chan *ioat_chan) { /* throw away whatever the channel was doing and get it Loading Loading @@ -979,9 +843,21 @@ int ioat_reset_hw(struct ioatdma_chan *ioat_chan) } } if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) { ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000); ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008); ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800); } err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200)); if (!err) err = ioat_irq_reinit(ioat_dma); if (!err) { if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) { writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000); writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008); writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800); } } if (err) dev_err(&pdev->dev, "Failed to reset: %d\n", err); Loading drivers/dma/ioat/dma.h +15 −8 Original line number Diff line number Diff line Loading @@ -62,7 +62,6 @@ enum ioat_irq_mode { * struct ioatdma_device - internal representation of a IOAT device * @pdev: PCI-Express device * @reg_base: MMIO register space base address * @dma_pool: for allocating DMA descriptors * @completion_pool: DMA buffers for completion ops * @sed_hw_pool: DMA super descriptor pools * @dma_dev: embedded struct dma_device Loading @@ -76,8 +75,7 @@ enum ioat_irq_mode { struct ioatdma_device { struct pci_dev *pdev; void __iomem *reg_base; struct pci_pool *dma_pool; struct pci_pool *completion_pool; struct dma_pool *completion_pool; #define MAX_SED_POOLS 5 struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; struct dma_device dma_dev; Loading @@ -88,6 +86,16 @@ struct ioatdma_device { struct dca_provider *dca; enum ioat_irq_mode irq_mode; u32 cap; /* shadow version for CB3.3 chan reset errata workaround */ u64 msixtba0; u64 msixdata0; u32 msixpba; }; struct ioat_descs { void *virt; dma_addr_t hw; }; struct ioatdma_chan { Loading @@ -100,7 +108,6 @@ struct ioatdma_chan { #define IOAT_COMPLETION_ACK 1 #define IOAT_RESET_PENDING 2 #define IOAT_KOBJ_INIT_FAIL 3 #define IOAT_RESHAPE_PENDING 4 #define IOAT_RUN 5 #define IOAT_CHAN_ACTIVE 6 struct timer_list timer; Loading Loading @@ -133,6 +140,8 @@ struct ioatdma_chan { u16 produce; struct ioat_ring_ent **ring; spinlock_t prep_lock; struct ioat_descs descs[2]; int desc_chunks; }; struct ioat_sysfs_entry { Loading Loading @@ -302,10 +311,8 @@ static inline bool is_ioat_bug(unsigned long err) } #define IOAT_MAX_ORDER 16 #define ioat_get_alloc_order() \ (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) #define ioat_get_max_alloc_order() \ (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) #define IOAT_MAX_DESCS 65536 #define IOAT_DESCS_PER_2M 32768 static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan) { Loading drivers/dma/ioat/hw.h +2 −0 Original line number Diff line number Diff line Loading @@ -73,6 +73,8 @@ int system_has_dca_enabled(struct pci_dev *pdev); #define IOAT_DESC_SZ 64 struct ioat_dma_descriptor { uint32_t size; union { Loading drivers/dma/ioat/init.c +19 −30 Original line number Diff line number Diff line Loading @@ -28,6 +28,7 @@ #include <linux/prefetch.h> #include <linux/dca.h> #include <linux/aer.h> #include <linux/sizes.h> #include "dma.h" #include "registers.h" #include "hw.h" Loading Loading @@ -136,14 +137,6 @@ int ioat_pending_level = 4; module_param(ioat_pending_level, int, 0644); MODULE_PARM_DESC(ioat_pending_level, "high-water mark for pushing ioat descriptors (default: 4)"); int ioat_ring_alloc_order = 8; module_param(ioat_ring_alloc_order, int, 0644); MODULE_PARM_DESC(ioat_ring_alloc_order, "ioat+: allocate 2^n descriptors per channel (default: 8 max: 16)"); int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; module_param(ioat_ring_max_alloc_order, int, 0644); MODULE_PARM_DESC(ioat_ring_max_alloc_order, "ioat+: upper limit for ring size (default: 16)"); static char ioat_interrupt_style[32] = "msix"; module_param_string(ioat_interrupt_style, ioat_interrupt_style, sizeof(ioat_interrupt_style), 0644); Loading Loading @@ -504,23 +497,14 @@ static int ioat_probe(struct ioatdma_device *ioat_dma) struct pci_dev *pdev = ioat_dma->pdev; struct device *dev = &pdev->dev; /* DMA coherent memory pool for DMA descriptor allocations */ ioat_dma->dma_pool = pci_pool_create("dma_desc_pool", pdev, sizeof(struct ioat_dma_descriptor), 64, 0); if (!ioat_dma->dma_pool) { err = -ENOMEM; goto err_dma_pool; } ioat_dma->completion_pool = pci_pool_create("completion_pool", pdev, ioat_dma->completion_pool = dma_pool_create("completion_pool", dev, sizeof(u64), SMP_CACHE_BYTES, SMP_CACHE_BYTES); if (!ioat_dma->completion_pool) { err = -ENOMEM; goto err_completion_pool; goto err_out; } ioat_enumerate_channels(ioat_dma); Loading @@ -546,10 +530,8 @@ static int ioat_probe(struct ioatdma_device *ioat_dma) err_self_test: ioat_disable_interrupts(ioat_dma); err_setup_interrupts: pci_pool_destroy(ioat_dma->completion_pool); err_completion_pool: pci_pool_destroy(ioat_dma->dma_pool); err_dma_pool: dma_pool_destroy(ioat_dma->completion_pool); err_out: return err; } Loading @@ -559,8 +541,7 @@ static int ioat_register(struct ioatdma_device *ioat_dma) if (err) { ioat_disable_interrupts(ioat_dma); pci_pool_destroy(ioat_dma->completion_pool); pci_pool_destroy(ioat_dma->dma_pool); dma_pool_destroy(ioat_dma->completion_pool); } return err; Loading @@ -576,8 +557,7 @@ static void ioat_dma_remove(struct ioatdma_device *ioat_dma) dma_async_device_unregister(dma); pci_pool_destroy(ioat_dma->dma_pool); pci_pool_destroy(ioat_dma->completion_pool); dma_pool_destroy(ioat_dma->completion_pool); INIT_LIST_HEAD(&dma->channels); } Loading Loading @@ -666,10 +646,19 @@ static void ioat_free_chan_resources(struct dma_chan *c) ioat_free_ring_ent(desc, c); } for (i = 0; i < ioat_chan->desc_chunks; i++) { dma_free_coherent(to_dev(ioat_chan), SZ_2M, ioat_chan->descs[i].virt, ioat_chan->descs[i].hw); ioat_chan->descs[i].virt = NULL; ioat_chan->descs[i].hw = 0; } ioat_chan->desc_chunks = 0; kfree(ioat_chan->ring); ioat_chan->ring = NULL; ioat_chan->alloc_order = 0; pci_pool_free(ioat_dma->completion_pool, ioat_chan->completion, dma_pool_free(ioat_dma->completion_pool, ioat_chan->completion, ioat_chan->completion_dma); spin_unlock_bh(&ioat_chan->prep_lock); spin_unlock_bh(&ioat_chan->cleanup_lock); Loading Loading @@ -701,7 +690,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ ioat_chan->completion = pci_pool_alloc(ioat_chan->ioat_dma->completion_pool, dma_pool_alloc(ioat_chan->ioat_dma->completion_pool, GFP_KERNEL, &ioat_chan->completion_dma); if (!ioat_chan->completion) return -ENOMEM; Loading @@ -712,7 +701,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) writel(((u64)ioat_chan->completion_dma) >> 32, ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); order = ioat_get_alloc_order(); order = IOAT_MAX_ORDER; ring = ioat_alloc_ring(c, order, GFP_KERNEL); if (!ring) return -ENOMEM; Loading Loading
drivers/dma/ioat/dma.c +72 −196 Original line number Diff line number Diff line Loading @@ -31,6 +31,7 @@ #include <linux/dma-mapping.h> #include <linux/workqueue.h> #include <linux/prefetch.h> #include <linux/sizes.h> #include "dma.h" #include "registers.h" #include "hw.h" Loading Loading @@ -290,24 +291,30 @@ static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx) } static struct ioat_ring_ent * ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags) { struct ioat_dma_descriptor *hw; struct ioat_ring_ent *desc; struct ioatdma_device *ioat_dma; struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); int chunk; dma_addr_t phys; u8 *pos; off_t offs; ioat_dma = to_ioatdma_device(chan->device); hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys); if (!hw) return NULL; chunk = idx / IOAT_DESCS_PER_2M; idx &= (IOAT_DESCS_PER_2M - 1); offs = idx * IOAT_DESC_SZ; pos = (u8 *)ioat_chan->descs[chunk].virt + offs; phys = ioat_chan->descs[chunk].hw + offs; hw = (struct ioat_dma_descriptor *)pos; memset(hw, 0, sizeof(*hw)); desc = kmem_cache_zalloc(ioat_cache, flags); if (!desc) { pci_pool_free(ioat_dma->dma_pool, hw, phys); if (!desc) return NULL; } dma_async_tx_descriptor_init(&desc->txd, chan); desc->txd.tx_submit = ioat_tx_submit_unlock; Loading @@ -318,156 +325,79 @@ ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) { struct ioatdma_device *ioat_dma; ioat_dma = to_ioatdma_device(chan->device); pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys); kmem_cache_free(ioat_cache, desc); } struct ioat_ring_ent ** ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) { struct ioatdma_chan *ioat_chan = to_ioat_chan(c); struct ioat_ring_ent **ring; int descs = 1 << order; int i; if (order > ioat_get_max_alloc_order()) return NULL; int total_descs = 1 << order; int i, chunks; /* allocate the array to hold the software ring */ ring = kcalloc(descs, sizeof(*ring), flags); ring = kcalloc(total_descs, sizeof(*ring), flags); if (!ring) return NULL; for (i = 0; i < descs; i++) { ring[i] = ioat_alloc_ring_ent(c, flags); if (!ring[i]) { while (i--) ioat_free_ring_ent(ring[i], c); kfree(ring); return NULL; } set_desc_id(ring[i], i); } /* link descs */ for (i = 0; i < descs-1; i++) { struct ioat_ring_ent *next = ring[i+1]; struct ioat_dma_descriptor *hw = ring[i]->hw; hw->next = next->txd.phys; } ring[i]->hw->next = ring[0]->txd.phys; return ring; } static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order) { /* reshape differs from normal ring allocation in that we want * to allocate a new software ring while only * extending/truncating the hardware ring */ struct dma_chan *c = &ioat_chan->dma_chan; const u32 curr_size = ioat_ring_size(ioat_chan); const u16 active = ioat_ring_active(ioat_chan); const u32 new_size = 1 << order; struct ioat_ring_ent **ring; u32 i; if (order > ioat_get_max_alloc_order()) return false; /* double check that we have at least 1 free descriptor */ if (active == curr_size) return false; /* when shrinking, verify that we can hold the current active * set in the new ring */ if (active >= new_size) return false; ioat_chan->desc_chunks = chunks = (total_descs * IOAT_DESC_SZ) / SZ_2M; /* allocate the array to hold the software ring */ ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); if (!ring) return false; for (i = 0; i < chunks; i++) { struct ioat_descs *descs = &ioat_chan->descs[i]; /* allocate/trim descriptors as needed */ if (new_size > curr_size) { /* copy current descriptors to the new ring */ for (i = 0; i < curr_size; i++) { u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); u16 new_idx = (ioat_chan->tail+i) & (new_size-1); descs->virt = dma_alloc_coherent(to_dev(ioat_chan), SZ_2M, &descs->hw, flags); if (!descs->virt && (i > 0)) { int idx; ring[new_idx] = ioat_chan->ring[curr_idx]; set_desc_id(ring[new_idx], new_idx); for (idx = 0; idx < i; idx++) { dma_free_coherent(to_dev(ioat_chan), SZ_2M, descs->virt, descs->hw); descs->virt = NULL; descs->hw = 0; } /* add new descriptors to the ring */ for (i = curr_size; i < new_size; i++) { u16 new_idx = (ioat_chan->tail+i) & (new_size-1); ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT); if (!ring[new_idx]) { while (i--) { u16 new_idx = (ioat_chan->tail+i) & (new_size-1); ioat_free_ring_ent(ring[new_idx], c); } ioat_chan->desc_chunks = 0; kfree(ring); return false; return NULL; } set_desc_id(ring[new_idx], new_idx); } /* hw link new descriptors */ for (i = curr_size-1; i < new_size; i++) { u16 new_idx = (ioat_chan->tail+i) & (new_size-1); struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)]; struct ioat_dma_descriptor *hw = ring[new_idx]->hw; hw->next = next->txd.phys; } } else { struct ioat_dma_descriptor *hw; struct ioat_ring_ent *next; for (i = 0; i < total_descs; i++) { ring[i] = ioat_alloc_ring_ent(c, i, flags); if (!ring[i]) { int idx; /* copy current descriptors to the new ring, dropping the * removed descriptors */ for (i = 0; i < new_size; i++) { u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); u16 new_idx = (ioat_chan->tail+i) & (new_size-1); while (i--) ioat_free_ring_ent(ring[i], c); ring[new_idx] = ioat_chan->ring[curr_idx]; set_desc_id(ring[new_idx], new_idx); for (idx = 0; idx < ioat_chan->desc_chunks; idx++) { dma_free_coherent(to_dev(ioat_chan), SZ_2M, ioat_chan->descs[idx].virt, ioat_chan->descs[idx].hw); ioat_chan->descs[idx].virt = NULL; ioat_chan->descs[idx].hw = 0; } /* free deleted descriptors */ for (i = new_size; i < curr_size; i++) { struct ioat_ring_ent *ent; ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i); ioat_free_ring_ent(ent, c); ioat_chan->desc_chunks = 0; kfree(ring); return NULL; } /* fix up hardware ring */ hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw; next = ring[(ioat_chan->tail+new_size) & (new_size-1)]; hw->next = next->txd.phys; set_desc_id(ring[i], i); } dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n", __func__, new_size); /* link descs */ for (i = 0; i < total_descs-1; i++) { struct ioat_ring_ent *next = ring[i+1]; struct ioat_dma_descriptor *hw = ring[i]->hw; kfree(ioat_chan->ring); ioat_chan->ring = ring; ioat_chan->alloc_order = order; hw->next = next->txd.phys; } ring[i]->hw->next = ring[0]->txd.phys; return true; return ring; } /** Loading @@ -478,9 +408,6 @@ static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order) int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) __acquires(&ioat_chan->prep_lock) { bool retry; retry: spin_lock_bh(&ioat_chan->prep_lock); /* never allow the last descriptor to be consumed, we need at * least one free at all times to allow for on-the-fly ring Loading @@ -493,24 +420,8 @@ int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) ioat_chan->produce = num_descs; return 0; /* with ioat->prep_lock held */ } retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); spin_unlock_bh(&ioat_chan->prep_lock); /* is another cpu already trying to expand the ring? */ if (retry) goto retry; spin_lock_bh(&ioat_chan->cleanup_lock); spin_lock_bh(&ioat_chan->prep_lock); retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1); clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); spin_unlock_bh(&ioat_chan->prep_lock); spin_unlock_bh(&ioat_chan->cleanup_lock); /* if we were able to expand the ring retry the allocation */ if (retry) goto retry; dev_dbg_ratelimited(to_dev(ioat_chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n", __func__, num_descs, ioat_chan->head, Loading Loading @@ -823,19 +734,6 @@ static void check_active(struct ioatdma_chan *ioat_chan) if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); else if (ioat_chan->alloc_order > ioat_get_alloc_order()) { /* if the ring is idle, empty, and oversized try to step * down the size */ reshape_ring(ioat_chan, ioat_chan->alloc_order - 1); /* keep shrinking until we get back to our minimum * default size */ if (ioat_chan->alloc_order > ioat_get_alloc_order()) mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); } } void ioat_timer_event(unsigned long data) Loading Loading @@ -906,40 +804,6 @@ ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, return dma_cookie_status(c, cookie, txstate); } static int ioat_irq_reinit(struct ioatdma_device *ioat_dma) { struct pci_dev *pdev = ioat_dma->pdev; int irq = pdev->irq, i; if (!is_bwd_ioat(pdev)) return 0; switch (ioat_dma->irq_mode) { case IOAT_MSIX: for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) { struct msix_entry *msix = &ioat_dma->msix_entries[i]; struct ioatdma_chan *ioat_chan; ioat_chan = ioat_chan_by_index(ioat_dma, i); devm_free_irq(&pdev->dev, msix->vector, ioat_chan); } pci_disable_msix(pdev); break; case IOAT_MSI: pci_disable_msi(pdev); /* fall through */ case IOAT_INTX: devm_free_irq(&pdev->dev, irq, ioat_dma); break; default: return 0; } ioat_dma->irq_mode = IOAT_NOIRQ; return ioat_dma_setup_interrupts(ioat_dma); } int ioat_reset_hw(struct ioatdma_chan *ioat_chan) { /* throw away whatever the channel was doing and get it Loading Loading @@ -979,9 +843,21 @@ int ioat_reset_hw(struct ioatdma_chan *ioat_chan) } } if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) { ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000); ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008); ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800); } err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200)); if (!err) err = ioat_irq_reinit(ioat_dma); if (!err) { if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) { writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000); writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008); writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800); } } if (err) dev_err(&pdev->dev, "Failed to reset: %d\n", err); Loading
drivers/dma/ioat/dma.h +15 −8 Original line number Diff line number Diff line Loading @@ -62,7 +62,6 @@ enum ioat_irq_mode { * struct ioatdma_device - internal representation of a IOAT device * @pdev: PCI-Express device * @reg_base: MMIO register space base address * @dma_pool: for allocating DMA descriptors * @completion_pool: DMA buffers for completion ops * @sed_hw_pool: DMA super descriptor pools * @dma_dev: embedded struct dma_device Loading @@ -76,8 +75,7 @@ enum ioat_irq_mode { struct ioatdma_device { struct pci_dev *pdev; void __iomem *reg_base; struct pci_pool *dma_pool; struct pci_pool *completion_pool; struct dma_pool *completion_pool; #define MAX_SED_POOLS 5 struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; struct dma_device dma_dev; Loading @@ -88,6 +86,16 @@ struct ioatdma_device { struct dca_provider *dca; enum ioat_irq_mode irq_mode; u32 cap; /* shadow version for CB3.3 chan reset errata workaround */ u64 msixtba0; u64 msixdata0; u32 msixpba; }; struct ioat_descs { void *virt; dma_addr_t hw; }; struct ioatdma_chan { Loading @@ -100,7 +108,6 @@ struct ioatdma_chan { #define IOAT_COMPLETION_ACK 1 #define IOAT_RESET_PENDING 2 #define IOAT_KOBJ_INIT_FAIL 3 #define IOAT_RESHAPE_PENDING 4 #define IOAT_RUN 5 #define IOAT_CHAN_ACTIVE 6 struct timer_list timer; Loading Loading @@ -133,6 +140,8 @@ struct ioatdma_chan { u16 produce; struct ioat_ring_ent **ring; spinlock_t prep_lock; struct ioat_descs descs[2]; int desc_chunks; }; struct ioat_sysfs_entry { Loading Loading @@ -302,10 +311,8 @@ static inline bool is_ioat_bug(unsigned long err) } #define IOAT_MAX_ORDER 16 #define ioat_get_alloc_order() \ (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) #define ioat_get_max_alloc_order() \ (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) #define IOAT_MAX_DESCS 65536 #define IOAT_DESCS_PER_2M 32768 static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan) { Loading
drivers/dma/ioat/hw.h +2 −0 Original line number Diff line number Diff line Loading @@ -73,6 +73,8 @@ int system_has_dca_enabled(struct pci_dev *pdev); #define IOAT_DESC_SZ 64 struct ioat_dma_descriptor { uint32_t size; union { Loading
drivers/dma/ioat/init.c +19 −30 Original line number Diff line number Diff line Loading @@ -28,6 +28,7 @@ #include <linux/prefetch.h> #include <linux/dca.h> #include <linux/aer.h> #include <linux/sizes.h> #include "dma.h" #include "registers.h" #include "hw.h" Loading Loading @@ -136,14 +137,6 @@ int ioat_pending_level = 4; module_param(ioat_pending_level, int, 0644); MODULE_PARM_DESC(ioat_pending_level, "high-water mark for pushing ioat descriptors (default: 4)"); int ioat_ring_alloc_order = 8; module_param(ioat_ring_alloc_order, int, 0644); MODULE_PARM_DESC(ioat_ring_alloc_order, "ioat+: allocate 2^n descriptors per channel (default: 8 max: 16)"); int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; module_param(ioat_ring_max_alloc_order, int, 0644); MODULE_PARM_DESC(ioat_ring_max_alloc_order, "ioat+: upper limit for ring size (default: 16)"); static char ioat_interrupt_style[32] = "msix"; module_param_string(ioat_interrupt_style, ioat_interrupt_style, sizeof(ioat_interrupt_style), 0644); Loading Loading @@ -504,23 +497,14 @@ static int ioat_probe(struct ioatdma_device *ioat_dma) struct pci_dev *pdev = ioat_dma->pdev; struct device *dev = &pdev->dev; /* DMA coherent memory pool for DMA descriptor allocations */ ioat_dma->dma_pool = pci_pool_create("dma_desc_pool", pdev, sizeof(struct ioat_dma_descriptor), 64, 0); if (!ioat_dma->dma_pool) { err = -ENOMEM; goto err_dma_pool; } ioat_dma->completion_pool = pci_pool_create("completion_pool", pdev, ioat_dma->completion_pool = dma_pool_create("completion_pool", dev, sizeof(u64), SMP_CACHE_BYTES, SMP_CACHE_BYTES); if (!ioat_dma->completion_pool) { err = -ENOMEM; goto err_completion_pool; goto err_out; } ioat_enumerate_channels(ioat_dma); Loading @@ -546,10 +530,8 @@ static int ioat_probe(struct ioatdma_device *ioat_dma) err_self_test: ioat_disable_interrupts(ioat_dma); err_setup_interrupts: pci_pool_destroy(ioat_dma->completion_pool); err_completion_pool: pci_pool_destroy(ioat_dma->dma_pool); err_dma_pool: dma_pool_destroy(ioat_dma->completion_pool); err_out: return err; } Loading @@ -559,8 +541,7 @@ static int ioat_register(struct ioatdma_device *ioat_dma) if (err) { ioat_disable_interrupts(ioat_dma); pci_pool_destroy(ioat_dma->completion_pool); pci_pool_destroy(ioat_dma->dma_pool); dma_pool_destroy(ioat_dma->completion_pool); } return err; Loading @@ -576,8 +557,7 @@ static void ioat_dma_remove(struct ioatdma_device *ioat_dma) dma_async_device_unregister(dma); pci_pool_destroy(ioat_dma->dma_pool); pci_pool_destroy(ioat_dma->completion_pool); dma_pool_destroy(ioat_dma->completion_pool); INIT_LIST_HEAD(&dma->channels); } Loading Loading @@ -666,10 +646,19 @@ static void ioat_free_chan_resources(struct dma_chan *c) ioat_free_ring_ent(desc, c); } for (i = 0; i < ioat_chan->desc_chunks; i++) { dma_free_coherent(to_dev(ioat_chan), SZ_2M, ioat_chan->descs[i].virt, ioat_chan->descs[i].hw); ioat_chan->descs[i].virt = NULL; ioat_chan->descs[i].hw = 0; } ioat_chan->desc_chunks = 0; kfree(ioat_chan->ring); ioat_chan->ring = NULL; ioat_chan->alloc_order = 0; pci_pool_free(ioat_dma->completion_pool, ioat_chan->completion, dma_pool_free(ioat_dma->completion_pool, ioat_chan->completion, ioat_chan->completion_dma); spin_unlock_bh(&ioat_chan->prep_lock); spin_unlock_bh(&ioat_chan->cleanup_lock); Loading Loading @@ -701,7 +690,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ ioat_chan->completion = pci_pool_alloc(ioat_chan->ioat_dma->completion_pool, dma_pool_alloc(ioat_chan->ioat_dma->completion_pool, GFP_KERNEL, &ioat_chan->completion_dma); if (!ioat_chan->completion) return -ENOMEM; Loading @@ -712,7 +701,7 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) writel(((u64)ioat_chan->completion_dma) >> 32, ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); order = ioat_get_alloc_order(); order = IOAT_MAX_ORDER; ring = ioat_alloc_ring(c, order, GFP_KERNEL); if (!ring) return -ENOMEM; Loading