[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] xen/arm: smmuv3: Add cache maintenance for non-coherent SMMU queues


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Dmytro Firsov <Dmytro_Firsov@xxxxxxxx>
  • Date: Wed, 6 Aug 2025 14:58:55 +0000
  • Accept-language: en-US
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=epam.com; dmarc=pass action=none header.from=epam.com; dkim=pass header.d=epam.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector10001; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=dYhpCbpdRqtHQG0qWN9E2ZSljMfq2rfgBTqMs/d2TRg=; b=Ovvj1916yHz++4f7xhtPHXS14ToC1g2H1d1unVTaqJjLw17bp8GAqfzV8HGytPMI0jQ6CZle4SAy1TMI6StmZbH/Rxmw8weDsAmE4bTc3SccAj5LekdA2RY7kL5WpTyBkR9Z1T9yA7Ysr1A8p34ihBwIVhBDNZGd1Oik/zIp1Tx/5vpWEgdvoqqcqWYPMqNEXyHZ5ZWwzEuwfNjRvekIyemvH7KBiewy3UMDy8DJGf5S2nEJfDGH9ZYA8aebnzBIqTYbVS1Fib6ME9mUjh2F/4mJibHYlegIGRdxkJ2sY5/wof3gu1ZJDXybNxx/la5522o4Y8QaEX57RGRu2xwikw==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none; b=D6Bc53H2B2648t4bDshHlFLhRst0wi0/rZxiOD3hLazKQ3V66NvTijGyTqJQTagEBZUy2N3M0sXBm8Ii/VLqioWW7YQ8mZC1SmIMvSsqsaQbGg4CyLFyHzHHR7qcjiESU8yEO7RTZzQ+1zyxDlHtM7KlvTbEWEbScv0KW0znvhx/7SPCX1VadtspvYBPv2yPOaUrYpnoh7OCVkDfMwew1jbzOWNV1X68NSfm3vO6JoD6vludLENuQfgmO13G7yD7iUbRsU+rBKk4XKZe6Q17wLsr8aWQ2cKYjP8eUtH6Lem9MqLv7cui9MYlWZvp/yGfKsSCfcOG+7i6Tte4SKiRPA==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=epam.com;
  • Cc: Dmytro Firsov <Dmytro_Firsov@xxxxxxxx>, Bertrand Marquis <bertrand.marquis@xxxxxxx>, Rahul Singh <rahul.singh@xxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, Julien Grall <julien@xxxxxxx>, Michal Orzel <michal.orzel@xxxxxxx>, Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx>
  • Delivery-date: Wed, 06 Aug 2025 14:59:14 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
  • Thread-index: AQHcBuKhRQAjcpitXUCHafCQJ8zznQ==
  • Thread-topic: [PATCH] xen/arm: smmuv3: Add cache maintenance for non-coherent SMMU queues

According to the Arm SMMUv3 spec (ARM IHI 0070), a system may have
SMMU(s) that is/are non-coherent to the PE (processing element). In such
cases, memory accesses from the PE should be either non-cached or be
augmented with manual cache maintenance. SMMU cache coherency is reported
by bit 4 (COHACC) of the SMMU_IDR0 register and is already present in the
Xen driver. However, the current implementation is not aware of cache
maintenance for memory that is shared between the PE and non-coherent
SMMUs. It contains dmam_alloc_coherent() function, that is added during
Linux driver porting. But it is actually a wrapper for _xzalloc(), that
returns normal writeback memory (which is OK for coherent SMMUs).

During Xen bring-up on a system with non-coherent SMMUs, the driver did
not work properly - the SMMU was not functional and halted initialization
at the very beginning due to a timeout while waiting for CMD_SYNC
completion:

  (XEN) SMMUv3: /soc/iommu@fa000000: CMD_SYNC timeout
  (XEN) SMMUv3: /soc/iommu@fa000000: CMD_SYNC timeout

To properly handle such scenarios, add the non_coherent flag to the
arm_smmu_queue struct. It is initialized using features reported by the
SMMU HW and will be used for triggering cache clean/invalidate operations.
This flag is not queue-specific (it is applicable to the whole SMMU), but
adding it to arm_smmu_queue allows us to not change function signatures
and simplify the patch (smmu->features, which contains the required flag,
are not available in code parts that require cache maintenance).

Signed-off-by: Dmytro Firsov <dmytro_firsov@xxxxxxxx>
---
 xen/drivers/passthrough/arm/smmu-v3.c | 27 +++++++++++++++++++++++----
 xen/drivers/passthrough/arm/smmu-v3.h |  7 +++++++
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/xen/drivers/passthrough/arm/smmu-v3.c 
b/xen/drivers/passthrough/arm/smmu-v3.c
index 5e9e3e048e..bf153227db 100644
--- a/xen/drivers/passthrough/arm/smmu-v3.c
+++ b/xen/drivers/passthrough/arm/smmu-v3.c
@@ -346,10 +346,14 @@ static void queue_write(__le64 *dst, u64 *src, size_t 
n_dwords)
 
 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
 {
+       __le64 *q_addr = Q_ENT(q, q->llq.prod);
+
        if (queue_full(&q->llq))
                return -ENOSPC;
 
-       queue_write(Q_ENT(q, q->llq.prod), ent, q->ent_dwords);
+       queue_write(q_addr, ent, q->ent_dwords);
+       if (q->non_coherent)
+               clean_dcache_va_range(q_addr, q->ent_dwords * sizeof(*q_addr));
        queue_inc_prod(&q->llq);
        queue_sync_prod_out(q);
        return 0;
@@ -365,10 +369,15 @@ static void queue_read(u64 *dst, __le64 *src, size_t 
n_dwords)
 
 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
 {
+       __le64 *q_addr = Q_ENT(q, q->llq.cons);
+
        if (queue_empty(&q->llq))
                return -EAGAIN;
 
-       queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
+       if (q->non_coherent)
+               invalidate_dcache_va_range(q_addr, q->ent_dwords * 
sizeof(*q_addr));
+
+       queue_read(ent, q_addr, q->ent_dwords);
        queue_inc_cons(&q->llq);
        queue_sync_cons_out(q);
        return 0;
@@ -463,6 +472,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device 
*smmu)
        struct arm_smmu_queue *q = &smmu->cmdq.q;
        u32 cons = readl_relaxed(q->cons_reg);
        u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
+       __le64 *q_addr = Q_ENT(q, cons);
        struct arm_smmu_cmdq_ent cmd_sync = {
                .opcode = CMDQ_OP_CMD_SYNC,
        };
@@ -489,11 +499,14 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device 
*smmu)
                break;
        }
 
+       if (q->non_coherent)
+               invalidate_dcache_va_range(q_addr, q->ent_dwords * 
sizeof(*q_addr));
+
        /*
         * We may have concurrent producers, so we need to be careful
         * not to touch any of the shadow cmdq state.
         */
-       queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
+       queue_read(cmd, q_addr, q->ent_dwords);
        dev_err(smmu->dev, "skipping command in error state:\n");
        for (i = 0; i < ARRAY_SIZE(cmd); ++i)
                dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
@@ -504,7 +517,10 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device 
*smmu)
                return;
        }
 
-       queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
+       queue_write(q_addr, cmd, q->ent_dwords);
+
+       if (q->non_coherent)
+               clean_dcache_va_range(q_addr, q->ent_dwords * sizeof(*q_addr));
 }
 
 static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
@@ -1634,6 +1650,9 @@ static int __init arm_smmu_init_one_queue(struct 
arm_smmu_device *smmu,
        q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
 
        q->llq.prod = q->llq.cons = 0;
+
+       q->non_coherent = !(smmu->features & ARM_SMMU_FEAT_COHERENCY);
+
        return 0;
 }
 
diff --git a/xen/drivers/passthrough/arm/smmu-v3.h 
b/xen/drivers/passthrough/arm/smmu-v3.h
index f09048812c..db936b9bd4 100644
--- a/xen/drivers/passthrough/arm/smmu-v3.h
+++ b/xen/drivers/passthrough/arm/smmu-v3.h
@@ -522,6 +522,13 @@ struct arm_smmu_queue {
 
        u32 __iomem                     *prod_reg;
        u32 __iomem                     *cons_reg;
+
+       /*
+        * According to SMMU spec section 3.16, some systems may have
+        * SMMUs, that are non-coherent to PE (processing elements).
+        * In such case manual cache management is needed.
+        */
+       bool                            non_coherent;
 };
 
 struct arm_smmu_cmdq {
-- 
2.50.1



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.