]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
dmaengine: ae4dma: Add AMD ae4dma controller driver
authorBasavaraj Natikar <Basavaraj.Natikar@amd.com>
Fri, 25 Oct 2024 09:59:27 +0000 (15:29 +0530)
committerVinod Koul <vkoul@kernel.org>
Mon, 2 Dec 2024 17:25:27 +0000 (22:55 +0530)
Add support for AMD AE4DMA controller. It performs high-bandwidth
memory to memory and IO copy operation. Device commands are managed
via a circular queue of 'descriptors', each of which specifies source
and destination addresses for copying a single buffer of data.

Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com>
Reviewed-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Link: https://lore.kernel.org/r/20241025095931.726018-3-Basavaraj.Natikar@amd.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
MAINTAINERS
drivers/dma/amd/Kconfig
drivers/dma/amd/Makefile
drivers/dma/amd/ae4dma/Makefile [new file with mode: 0644]
drivers/dma/amd/ae4dma/ae4dma-dev.c [new file with mode: 0644]
drivers/dma/amd/ae4dma/ae4dma-pci.c [new file with mode: 0644]
drivers/dma/amd/ae4dma/ae4dma.h [new file with mode: 0644]
drivers/dma/amd/ptdma/ptdma-dmaengine.c

index b01420aa576de6e6428d386ac6f127c0b0626544..ae633a697a4e4a3366c6669a2d36e9b1776173a9 100644 (file)
@@ -980,6 +980,12 @@ L: linux-edac@vger.kernel.org
 S:     Supported
 F:     drivers/ras/amd/atl/*
 
+AMD AE4DMA DRIVER
+M:     Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+L:     dmaengine@vger.kernel.org
+S:     Supported
+F:     drivers/dma/amd/ae4dma/
+
 AMD AXI W1 DRIVER
 M:     Kris Chaplin <kris.chaplin@amd.com>
 R:     Thomas Delev <thomas.delev@amd.com>
index a09517d51449001517efbd89e8d12e43c4ceaf00..00d874872a8f8b8503245a16d623f42176a831a4 100644 (file)
@@ -1,5 +1,20 @@
 # SPDX-License-Identifier: GPL-2.0-only
 #
+
+config AMD_AE4DMA
+       tristate  "AMD AE4DMA Engine"
+       depends on (X86_64 || COMPILE_TEST) && PCI
+       depends on AMD_PTDMA
+       select DMA_ENGINE
+       select DMA_VIRTUAL_CHANNELS
+       help
+         Enable support for the AMD AE4DMA controller. This controller
+         provides DMA capabilities to perform high bandwidth memory to
+         memory and IO copy operations. It performs DMA transfer through
+         queue-based descriptor management. This DMA controller is intended
+         to be used with AMD Non-Transparent Bridge devices and not for
+         general purpose peripheral DMA.
+
 config AMD_PTDMA
        tristate  "AMD PassThru DMA Engine"
        depends on X86_64 && PCI
index fb12f2f9e7b750fcecd2cf6eff0cc920fd28ae1d..11278c06374d4442995425e6ce34d3b85249506b 100644 (file)
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
+obj-$(CONFIG_AMD_AE4DMA) += ae4dma/
 obj-$(CONFIG_AMD_PTDMA) += ptdma/
 obj-$(CONFIG_AMD_QDMA) += qdma/
diff --git a/drivers/dma/amd/ae4dma/Makefile b/drivers/dma/amd/ae4dma/Makefile
new file mode 100644 (file)
index 0000000..e918f85
--- /dev/null
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# AMD AE4DMA driver
+#
+
+obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
+
+ae4dma-objs := ae4dma-dev.o
+
+ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c b/drivers/dma/amd/ae4dma/ae4dma-dev.c
new file mode 100644 (file)
index 0000000..7cbef9e
--- /dev/null
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD AE4DMA driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+
+#include "ae4dma.h"
+
+static unsigned int max_hw_q = 1;
+module_param(max_hw_q, uint, 0444);
+MODULE_PARM_DESC(max_hw_q, "max hw queues supported by engine (any non-zero value, default: 1)");
+
+static void ae4_pending_work(struct work_struct *work)
+{
+       struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct ae4_cmd_queue, p_work.work);
+       struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
+       struct pt_cmd *cmd;
+       u32 cridx;
+
+       for (;;) {
+               wait_event_interruptible(ae4cmd_q->q_w,
+                                        ((atomic64_read(&ae4cmd_q->done_cnt)) <
+                                          atomic64_read(&ae4cmd_q->intr_cnt)));
+
+               atomic64_inc(&ae4cmd_q->done_cnt);
+
+               mutex_lock(&ae4cmd_q->cmd_lock);
+               cridx = readl(cmd_q->reg_control + AE4_RD_IDX_OFF);
+               while ((ae4cmd_q->dridx != cridx) && !list_empty(&ae4cmd_q->cmd)) {
+                       cmd = list_first_entry(&ae4cmd_q->cmd, struct pt_cmd, entry);
+                       list_del(&cmd->entry);
+
+                       ae4_check_status_error(ae4cmd_q, ae4cmd_q->dridx);
+                       cmd->pt_cmd_callback(cmd->data, cmd->ret);
+
+                       ae4cmd_q->q_cmd_count--;
+                       ae4cmd_q->dridx = (ae4cmd_q->dridx + 1) % CMD_Q_LEN;
+
+                       complete_all(&ae4cmd_q->cmp);
+               }
+               mutex_unlock(&ae4cmd_q->cmd_lock);
+       }
+}
+
+static irqreturn_t ae4_core_irq_handler(int irq, void *data)
+{
+       struct ae4_cmd_queue *ae4cmd_q = data;
+       struct pt_cmd_queue *cmd_q;
+       struct pt_device *pt;
+       u32 status;
+
+       cmd_q = &ae4cmd_q->cmd_q;
+       pt = cmd_q->pt;
+
+       pt->total_interrupts++;
+       atomic64_inc(&ae4cmd_q->intr_cnt);
+
+       status = readl(cmd_q->reg_control + AE4_INTR_STS_OFF);
+       if (status & BIT(0)) {
+               status &= GENMASK(31, 1);
+               writel(status, cmd_q->reg_control + AE4_INTR_STS_OFF);
+       }
+
+       wake_up(&ae4cmd_q->q_w);
+
+       return IRQ_HANDLED;
+}
+
+void ae4_destroy_work(struct ae4_device *ae4)
+{
+       struct ae4_cmd_queue *ae4cmd_q;
+       int i;
+
+       for (i = 0; i < ae4->cmd_q_count; i++) {
+               ae4cmd_q = &ae4->ae4cmd_q[i];
+
+               if (!ae4cmd_q->pws)
+                       break;
+
+               cancel_delayed_work_sync(&ae4cmd_q->p_work);
+               destroy_workqueue(ae4cmd_q->pws);
+       }
+}
+
+int ae4_core_init(struct ae4_device *ae4)
+{
+       struct pt_device *pt = &ae4->pt;
+       struct ae4_cmd_queue *ae4cmd_q;
+       struct device *dev = pt->dev;
+       struct pt_cmd_queue *cmd_q;
+       int i, ret = 0;
+
+       writel(max_hw_q, pt->io_regs);
+
+       for (i = 0; i < max_hw_q; i++) {
+               ae4cmd_q = &ae4->ae4cmd_q[i];
+               ae4cmd_q->id = ae4->cmd_q_count;
+               ae4->cmd_q_count++;
+
+               cmd_q = &ae4cmd_q->cmd_q;
+               cmd_q->pt = pt;
+
+               cmd_q->reg_control = pt->io_regs + ((i + 1) * AE4_Q_SZ);
+
+               ret = devm_request_irq(dev, ae4->ae4_irq[i], ae4_core_irq_handler, 0,
+                                      dev_name(pt->dev), ae4cmd_q);
+               if (ret)
+                       return ret;
+
+               cmd_q->qsize = Q_SIZE(sizeof(struct ae4dma_desc));
+
+               cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize, &cmd_q->qbase_dma,
+                                                  GFP_KERNEL);
+               if (!cmd_q->qbase)
+                       return -ENOMEM;
+       }
+
+       for (i = 0; i < ae4->cmd_q_count; i++) {
+               ae4cmd_q = &ae4->ae4cmd_q[i];
+
+               cmd_q = &ae4cmd_q->cmd_q;
+
+               cmd_q->reg_control = pt->io_regs + ((i + 1) * AE4_Q_SZ);
+
+               /* Update the device registers with queue information. */
+               writel(CMD_Q_LEN, cmd_q->reg_control + AE4_MAX_IDX_OFF);
+
+               cmd_q->qdma_tail = cmd_q->qbase_dma;
+               writel(lower_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + AE4_Q_BASE_L_OFF);
+               writel(upper_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + AE4_Q_BASE_H_OFF);
+
+               INIT_LIST_HEAD(&ae4cmd_q->cmd);
+               init_waitqueue_head(&ae4cmd_q->q_w);
+
+               ae4cmd_q->pws = alloc_ordered_workqueue("ae4dma_%d", WQ_MEM_RECLAIM, ae4cmd_q->id);
+               if (!ae4cmd_q->pws) {
+                       ae4_destroy_work(ae4);
+                       return -ENOMEM;
+               }
+               INIT_DELAYED_WORK(&ae4cmd_q->p_work, ae4_pending_work);
+               queue_delayed_work(ae4cmd_q->pws, &ae4cmd_q->p_work,  usecs_to_jiffies(100));
+
+               init_completion(&ae4cmd_q->cmp);
+       }
+
+       return ret;
+}
diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c b/drivers/dma/amd/ae4dma/ae4dma-pci.c
new file mode 100644 (file)
index 0000000..43d36e9
--- /dev/null
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD AE4DMA driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+
+#include "ae4dma.h"
+
+static int ae4_get_irqs(struct ae4_device *ae4)
+{
+       struct ae4_msix *ae4_msix = ae4->ae4_msix;
+       struct pt_device *pt = &ae4->pt;
+       struct device *dev = pt->dev;
+       struct pci_dev *pdev;
+       int i, v, ret;
+
+       pdev = to_pci_dev(dev);
+
+       for (v = 0; v < ARRAY_SIZE(ae4_msix->msix_entry); v++)
+               ae4_msix->msix_entry[v].entry = v;
+
+       ret = pci_alloc_irq_vectors(pdev, v, v, PCI_IRQ_MSIX);
+       if (ret != v) {
+               if (ret > 0)
+                       pci_free_irq_vectors(pdev);
+
+               dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
+               ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+               if (ret < 0) {
+                       dev_err(dev, "could not enable MSI (%d)\n", ret);
+                       return ret;
+               }
+
+               ret = pci_irq_vector(pdev, 0);
+               if (ret < 0) {
+                       pci_free_irq_vectors(pdev);
+                       return ret;
+               }
+
+               for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
+                       ae4->ae4_irq[i] = ret;
+
+       } else {
+               ae4_msix->msix_count = ret;
+               for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
+                       ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector;
+       }
+
+       return ret;
+}
+
+static void ae4_free_irqs(struct ae4_device *ae4)
+{
+       struct ae4_msix *ae4_msix = ae4->ae4_msix;
+       struct pt_device *pt = &ae4->pt;
+       struct device *dev = pt->dev;
+       struct pci_dev *pdev;
+
+       pdev = to_pci_dev(dev);
+
+       if (ae4_msix && (ae4_msix->msix_count || ae4->ae4_irq[MAX_AE4_HW_QUEUES - 1]))
+               pci_free_irq_vectors(pdev);
+}
+
+static void ae4_deinit(struct ae4_device *ae4)
+{
+       ae4_free_irqs(ae4);
+}
+
+static int ae4_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct device *dev = &pdev->dev;
+       struct ae4_device *ae4;
+       struct pt_device *pt;
+       int bar_mask;
+       int ret = 0;
+
+       ae4 = devm_kzalloc(dev, sizeof(*ae4), GFP_KERNEL);
+       if (!ae4)
+               return -ENOMEM;
+
+       ae4->ae4_msix = devm_kzalloc(dev, sizeof(struct ae4_msix), GFP_KERNEL);
+       if (!ae4->ae4_msix)
+               return -ENOMEM;
+
+       ret = pcim_enable_device(pdev);
+       if (ret)
+               goto ae4_error;
+
+       bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+       ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma");
+       if (ret)
+               goto ae4_error;
+
+       pt = &ae4->pt;
+       pt->dev = dev;
+
+       pt->io_regs = pcim_iomap_table(pdev)[0];
+       if (!pt->io_regs) {
+               ret = -ENOMEM;
+               goto ae4_error;
+       }
+
+       ret = ae4_get_irqs(ae4);
+       if (ret < 0)
+               goto ae4_error;
+
+       pci_set_master(pdev);
+
+       dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+
+       dev_set_drvdata(dev, ae4);
+
+       ret = ae4_core_init(ae4);
+       if (ret)
+               goto ae4_error;
+
+       return 0;
+
+ae4_error:
+       ae4_deinit(ae4);
+
+       return ret;
+}
+
+static void ae4_pci_remove(struct pci_dev *pdev)
+{
+       struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev);
+
+       ae4_destroy_work(ae4);
+       ae4_deinit(ae4);
+}
+
+static const struct pci_device_id ae4_pci_table[] = {
+       { PCI_VDEVICE(AMD, 0x14C8), },
+       { PCI_VDEVICE(AMD, 0x14DC), },
+       { PCI_VDEVICE(AMD, 0x149B), },
+       /* Last entry must be zero */
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ae4_pci_table);
+
+static struct pci_driver ae4_pci_driver = {
+       .name = "ae4dma",
+       .id_table = ae4_pci_table,
+       .probe = ae4_pci_probe,
+       .remove = ae4_pci_remove,
+};
+
+module_pci_driver(ae4_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AMD AE4DMA driver");
diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h
new file mode 100644 (file)
index 0000000..4a1dfcf
--- /dev/null
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD AE4DMA driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+#ifndef __AE4DMA_H__
+#define __AE4DMA_H__
+
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+
+#include "../ptdma/ptdma.h"
+#include "../../virt-dma.h"
+
+#define MAX_AE4_HW_QUEUES              16
+
+#define AE4_DESC_COMPLETED             0x03
+
+#define AE4_MAX_IDX_OFF                        0x08
+#define AE4_RD_IDX_OFF                 0x0c
+#define AE4_WR_IDX_OFF                 0x10
+#define AE4_INTR_STS_OFF               0x14
+#define AE4_Q_BASE_L_OFF               0x18
+#define AE4_Q_BASE_H_OFF               0x1c
+#define AE4_Q_SZ                       0x20
+
+struct ae4_msix {
+       int msix_count;
+       struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
+};
+
+struct ae4_cmd_queue {
+       struct ae4_device *ae4;
+       struct pt_cmd_queue cmd_q;
+       struct list_head cmd;
+       /* protect command operations */
+       struct mutex cmd_lock;
+       struct delayed_work p_work;
+       struct workqueue_struct *pws;
+       struct completion cmp;
+       wait_queue_head_t q_w;
+       atomic64_t intr_cnt;
+       atomic64_t done_cnt;
+       u64 q_cmd_count;
+       u32 dridx;
+       u32 id;
+};
+
+union dwou {
+       u32 dw0;
+       struct dword0 {
+       u8      byte0;
+       u8      byte1;
+       u16     timestamp;
+       } dws;
+};
+
+struct dword1 {
+       u8      status;
+       u8      err_code;
+       u16     desc_id;
+};
+
+struct ae4dma_desc {
+       union dwou dwouv;
+       struct dword1 dw1;
+       u32 length;
+       u32 rsvd;
+       u32 src_hi;
+       u32 src_lo;
+       u32 dst_hi;
+       u32 dst_lo;
+};
+
+struct ae4_device {
+       struct pt_device pt;
+       struct ae4_msix *ae4_msix;
+       struct ae4_cmd_queue ae4cmd_q[MAX_AE4_HW_QUEUES];
+       unsigned int ae4_irq[MAX_AE4_HW_QUEUES];
+       unsigned int cmd_q_count;
+};
+
+int ae4_core_init(struct ae4_device *ae4);
+void ae4_destroy_work(struct ae4_device *ae4);
+void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx);
+#endif
index a2e7c2cec15efd6584d11e85c54add1b1e0273c8..77fe709fb32782a071208bf6179153a468799f22 100644 (file)
  */
 
 #include "ptdma.h"
+#include "../ae4dma/ae4dma.h"
 #include "../../dmaengine.h"
 
+static char *ae4_error_codes[] = {
+       "",
+       "ERR 01: INVALID HEADER DW0",
+       "ERR 02: INVALID STATUS",
+       "ERR 03: INVALID LENGTH - 4 BYTE ALIGNMENT",
+       "ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT",
+       "ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT",
+       "ERR 06: INVALID ALIGNMENT",
+       "ERR 07: INVALID DESCRIPTOR",
+};
+
+static void ae4_log_error(struct pt_device *d, int e)
+{
+       /* ERR 01 - 07 represents Invalid AE4 errors */
+       if (e <= 7)
+               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", ae4_error_codes[e], e);
+       /* ERR 08 - 15 represents Invalid Descriptor errors */
+       else if (e > 7 && e <= 15)
+               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
+       /* ERR 16 - 31 represents Firmware errors */
+       else if (e > 15 && e <= 31)
+               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "FIRMWARE ERROR", e);
+       /* ERR 32 - 63 represents Fatal errors */
+       else if (e > 31 && e <= 63)
+               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "FATAL ERROR", e);
+       /* ERR 64 - 255 represents PTE errors */
+       else if (e > 63 && e <= 255)
+               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e);
+       else
+               dev_info(d->dev, "Unknown AE4DMA error");
+}
+
+void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx)
+{
+       struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
+       struct ae4dma_desc desc;
+       u8 status;
+
+       memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct ae4dma_desc));
+       status = desc.dw1.status;
+       if (status && status != AE4_DESC_COMPLETED) {
+               cmd_q->cmd_error = desc.dw1.err_code;
+               if (cmd_q->cmd_error)
+                       ae4_log_error(cmd_q->pt, cmd_q->cmd_error);
+       }
+}
+EXPORT_SYMBOL_GPL(ae4_check_status_error);
+
 static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan)
 {
        return container_of(dma_chan, struct pt_dma_chan, vc.chan);