[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH] xen/arm: smmuv3: Add cache maintenance for non-coherent SMMU queues
According to the Arm SMMUv3 spec (ARM IHI 0070), a system may have SMMU(s) that is/are non-coherent to the PE (processing element). In such cases, memory accesses from the PE should be either non-cached or be augmented with manual cache maintenance. SMMU cache coherency is reported by bit 4 (COHACC) of the SMMU_IDR0 register and is already present in the Xen driver. However, the current implementation is not aware of cache maintenance for memory that is shared between the PE and non-coherent SMMUs. It contains dmam_alloc_coherent() function, that is added during Linux driver porting. But it is actually a wrapper for _xzalloc(), that returns normal writeback memory (which is OK for coherent SMMUs). During Xen bring-up on a system with non-coherent SMMUs, the driver did not work properly - the SMMU was not functional and halted initialization at the very beginning due to a timeout while waiting for CMD_SYNC completion: (XEN) SMMUv3: /soc/iommu@fa000000: CMD_SYNC timeout (XEN) SMMUv3: /soc/iommu@fa000000: CMD_SYNC timeout To properly handle such scenarios, add the non_coherent flag to the arm_smmu_queue struct. It is initialized using features reported by the SMMU HW and will be used for triggering cache clean/invalidate operations. This flag is not queue-specific (it is applicable to the whole SMMU), but adding it to arm_smmu_queue allows us to not change function signatures and simplify the patch (smmu->features, which contains the required flag, are not available in code parts that require cache maintenance). Signed-off-by: Dmytro Firsov <dmytro_firsov@xxxxxxxx> --- xen/drivers/passthrough/arm/smmu-v3.c | 27 +++++++++++++++++++++++---- xen/drivers/passthrough/arm/smmu-v3.h | 7 +++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/xen/drivers/passthrough/arm/smmu-v3.c b/xen/drivers/passthrough/arm/smmu-v3.c index 5e9e3e048e..bf153227db 100644 --- a/xen/drivers/passthrough/arm/smmu-v3.c +++ b/xen/drivers/passthrough/arm/smmu-v3.c @@ -346,10 +346,14 @@ static void queue_write(__le64 *dst, u64 *src, size_t n_dwords) static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent) { + __le64 *q_addr = Q_ENT(q, q->llq.prod); + if (queue_full(&q->llq)) return -ENOSPC; - queue_write(Q_ENT(q, q->llq.prod), ent, q->ent_dwords); + queue_write(q_addr, ent, q->ent_dwords); + if (q->non_coherent) + clean_dcache_va_range(q_addr, q->ent_dwords * sizeof(*q_addr)); queue_inc_prod(&q->llq); queue_sync_prod_out(q); return 0; @@ -365,10 +369,15 @@ static void queue_read(u64 *dst, __le64 *src, size_t n_dwords) static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent) { + __le64 *q_addr = Q_ENT(q, q->llq.cons); + if (queue_empty(&q->llq)) return -EAGAIN; - queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords); + if (q->non_coherent) + invalidate_dcache_va_range(q_addr, q->ent_dwords * sizeof(*q_addr)); + + queue_read(ent, q_addr, q->ent_dwords); queue_inc_cons(&q->llq); queue_sync_cons_out(q); return 0; @@ -463,6 +472,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) struct arm_smmu_queue *q = &smmu->cmdq.q; u32 cons = readl_relaxed(q->cons_reg); u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons); + __le64 *q_addr = Q_ENT(q, cons); struct arm_smmu_cmdq_ent cmd_sync = { .opcode = CMDQ_OP_CMD_SYNC, }; @@ -489,11 +499,14 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) break; } + if (q->non_coherent) + invalidate_dcache_va_range(q_addr, q->ent_dwords * sizeof(*q_addr)); + /* * We may have concurrent producers, so we need to be careful * not to touch any of the shadow cmdq state. */ - queue_read(cmd, Q_ENT(q, cons), q->ent_dwords); + queue_read(cmd, q_addr, q->ent_dwords); dev_err(smmu->dev, "skipping command in error state:\n"); for (i = 0; i < ARRAY_SIZE(cmd); ++i) dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]); @@ -504,7 +517,10 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) return; } - queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); + queue_write(q_addr, cmd, q->ent_dwords); + + if (q->non_coherent) + clean_dcache_va_range(q_addr, q->ent_dwords * sizeof(*q_addr)); } static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd) @@ -1634,6 +1650,9 @@ static int __init arm_smmu_init_one_queue(struct arm_smmu_device *smmu, q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift); q->llq.prod = q->llq.cons = 0; + + q->non_coherent = !(smmu->features & ARM_SMMU_FEAT_COHERENCY); + return 0; } diff --git a/xen/drivers/passthrough/arm/smmu-v3.h b/xen/drivers/passthrough/arm/smmu-v3.h index f09048812c..db936b9bd4 100644 --- a/xen/drivers/passthrough/arm/smmu-v3.h +++ b/xen/drivers/passthrough/arm/smmu-v3.h @@ -522,6 +522,13 @@ struct arm_smmu_queue { u32 __iomem *prod_reg; u32 __iomem *cons_reg; + + /* + * According to SMMU spec section 3.16, some systems may have + * SMMUs, that are non-coherent to PE (processing elements). + * In such case manual cache management is needed. + */ + bool non_coherent; }; struct arm_smmu_cmdq { -- 2.50.1
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |