[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 2/4] VT-d / x86: re-arrange cache syncing


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Thu, 27 Jan 2022 15:48:26 +0100
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=none; dmarc=none; dkim=none; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=9uRmQeQ2GIyo2OW1VQw24UUZo9Bo39U2yqak3SzTI9o=; b=B2QbSkVWPjNwseaykb2WLX+lrl7PXgLs65Ed+OJGBECu7hAhiZwBXd31dxw4DAR9aWDpbRjuoBV5bQiJwn6OfON+jW7U2eDXvfzKHA/XNusD55I9sLp9sVA0QkyxeHZ1OjcixkaV+G/Z0Ojz+QHyRcZ2WY6qaWfQOa2VXz+/QsyTnyQiGXEwH/b20cbxqag/TlUtIcTw36byd+wS9KZcbg3YbNoyxL3zAzwtkS0lT4oQcN3lzJrFTssJAtH6FPONBD/wEZLM9HsvNfsR9HJPyNafwhELsEhraYjDUwBO4qn7KRt9ES061jYX02lPmg/ifK3DU7x8VsBSolkfdZ1SlQ==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=Iaf2ZAEFiph+lboTFgT+gQj387kmVuAt6F1xsUZrzsUYxupz+2c8jmeSs9nYwMNIrCDXpeetjQyc1ZeYmCFX8Rf1JeAq3Jf1J/0q2VUBbsTFolvTSR024Ez98XSXl3JynoNcWxkkz+J/P1lDpGU1Zd+tlQuUXi/yH4TN6w3EAIMR4p+IsORYaHNivg1ufQdmXmwLL00gjl5pt6UUEzuQwrCDf3NAbpRNgnmDSuw5OrikmiPPEXDOg+GhYrtT4+3N0kA/kwj7r0b+jMIx9DSPxFpzkPSizSYpTVf4X6sfZmDhqlINvy/KzTPpjt4dAWPYfiVgGZw79l33dHcxv46Bog==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Paul Durrant <paul@xxxxxxx>, Kevin Tian <kevin.tian@xxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Thu, 27 Jan 2022 14:48:33 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

The actual function should always have lived in core x86 code; move it
there, replacing get_cache_line_size() by readily available (except very
early during boot; see the code comment) data. Also rename the function.

Drop the respective IOMMU hook, (re)introducing a respective boolean
instead. Replace a true and an almost open-coding instance of
iommu_sync_cache().

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
Placing the function next to flush_area_local() exposes a curious
asymmetry between the SFENCE placements: sync_cache() has it after the
flush, while flush_area_local() has it before it. I think the latter one
is misplaced.
---
v2: Rename sync_cache() to cache_writeback().

--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -11,6 +11,7 @@
 #include <xen/sched.h>
 #include <xen/smp.h>
 #include <xen/softirq.h>
+#include <asm/cache.h>
 #include <asm/flushtlb.h>
 #include <asm/invpcid.h>
 #include <asm/nops.h>
@@ -265,6 +266,57 @@ unsigned int flush_area_local(const void
     return flags;
 }
 
+void cache_writeback(const void *addr, unsigned int size)
+{
+    /*
+     * This function may be called before current_cpu_data is established.
+     * Hence a fallback is needed to prevent the loop below becoming infinite.
+     */
+    unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
+    const void *end = addr + size;
+
+    addr -= (unsigned long)addr & (clflush_size - 1);
+    for ( ; addr < end; addr += clflush_size )
+    {
+/*
+ * The arguments to a macro must not include preprocessor directives. Doing so
+ * results in undefined behavior, so we have to create some defines here in
+ * order to avoid it.
+ */
+#if defined(HAVE_AS_CLWB)
+# define CLWB_ENCODING "clwb %[p]"
+#elif defined(HAVE_AS_XSAVEOPT)
+# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
+#else
+# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
+#endif
+
+#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
+#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
+# define INPUT BASE_INPUT
+#else
+# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
+#endif
+        /*
+         * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
+         * + prefix than a clflush + nop, and hence the prefix is added instead
+         * of letting the alternative framework fill the gap by appending nops.
+         */
+        alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
+                         "data16 clflush %[p]", /* clflushopt */
+                         X86_FEATURE_CLFLUSHOPT,
+                         CLWB_ENCODING,
+                         X86_FEATURE_CLWB, /* no outputs */,
+                         INPUT(addr));
+#undef INPUT
+#undef BASE_INPUT
+#undef CLWB_ENCODING
+    }
+
+    alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
+                      "sfence", X86_FEATURE_CLWB);
+}
+
 unsigned int guest_flush_tlb_flags(const struct domain *d)
 {
     bool shadow = paging_mode_shadow(d);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -240,54 +240,6 @@ domid_t did_to_domain_id(const struct vt
     return iommu->domid_map[did];
 }
 
-static void sync_cache(const void *addr, unsigned int size)
-{
-    static unsigned long clflush_size = 0;
-    const void *end = addr + size;
-
-    if ( clflush_size == 0 )
-        clflush_size = get_cache_line_size();
-
-    addr -= (unsigned long)addr & (clflush_size - 1);
-    for ( ; addr < end; addr += clflush_size )
-/*
- * The arguments to a macro must not include preprocessor directives. Doing so
- * results in undefined behavior, so we have to create some defines here in
- * order to avoid it.
- */
-#if defined(HAVE_AS_CLWB)
-# define CLWB_ENCODING "clwb %[p]"
-#elif defined(HAVE_AS_XSAVEOPT)
-# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
-#else
-# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
-#endif
-
-#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
-#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
-# define INPUT BASE_INPUT
-#else
-# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
-#endif
-        /*
-         * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
-         * + prefix than a clflush + nop, and hence the prefix is added instead
-         * of letting the alternative framework fill the gap by appending nops.
-         */
-        alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
-                         "data16 clflush %[p]", /* clflushopt */
-                         X86_FEATURE_CLFLUSHOPT,
-                         CLWB_ENCODING,
-                         X86_FEATURE_CLWB, /* no outputs */,
-                         INPUT(addr));
-#undef INPUT
-#undef BASE_INPUT
-#undef CLWB_ENCODING
-
-    alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
-                      "sfence", X86_FEATURE_CLWB);
-}
-
 /* Allocate page table, return its machine address */
 uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
 {
@@ -306,8 +258,7 @@ uint64_t alloc_pgtable_maddr(unsigned lo
 
         clear_page(vaddr);
 
-        if ( (iommu_ops.init ? &iommu_ops : &vtd_ops)->sync_cache )
-            sync_cache(vaddr, PAGE_SIZE);
+        iommu_sync_cache(vaddr, PAGE_SIZE);
         unmap_domain_page(vaddr);
         cur_pg++;
     }
@@ -1327,7 +1278,7 @@ int __init iommu_alloc(struct acpi_drhd_
     iommu->nr_pt_levels = agaw_to_level(agaw);
 
     if ( !ecap_coherent(iommu->ecap) )
-        vtd_ops.sync_cache = sync_cache;
+        iommu_non_coherent = true;
 
     nr_dom = cap_ndoms(iommu->cap);
 
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -28,6 +28,7 @@
 
 const struct iommu_init_ops *__initdata iommu_init_ops;
 struct iommu_ops __read_mostly iommu_ops;
+bool __read_mostly iommu_non_coherent;
 
 enum iommu_intremap __read_mostly iommu_intremap = iommu_intremap_full;
 
@@ -438,8 +439,7 @@ struct page_info *iommu_alloc_pgtable(st
     p = __map_domain_page(pg);
     clear_page(p);
 
-    if ( hd->platform_ops->sync_cache )
-        iommu_vcall(hd->platform_ops, sync_cache, p, PAGE_SIZE);
+    iommu_sync_cache(p, PAGE_SIZE);
 
     unmap_domain_page(p);
 
--- a/xen/arch/x86/include/asm/cache.h
+++ b/xen/arch/x86/include/asm/cache.h
@@ -11,4 +11,10 @@
 
 #define __read_mostly __section(".data.read_mostly")
 
+#ifndef __ASSEMBLY__
+
+void cache_writeback(const void *addr, unsigned int size);
+
+#endif
+
 #endif
--- a/xen/arch/x86/include/asm/iommu.h
+++ b/xen/arch/x86/include/asm/iommu.h
@@ -19,6 +19,7 @@
 #include <xen/mem_access.h>
 #include <xen/spinlock.h>
 #include <asm/apicdef.h>
+#include <asm/cache.h>
 #include <asm/processor.h>
 #include <asm/hvm/vmx/vmcs.h>
 
@@ -134,12 +135,13 @@ extern bool untrusted_msi;
 int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
                    const uint8_t gvec);
 
-#define iommu_sync_cache(addr, size) ({                 \
-    const struct iommu_ops *ops = iommu_get_ops();      \
-                                                        \
-    if ( ops->sync_cache )                              \
-        iommu_vcall(ops, sync_cache, addr, size);       \
-})
+extern bool iommu_non_coherent;
+
+static inline void iommu_sync_cache(const void *addr, unsigned int size)
+{
+    if ( iommu_non_coherent )
+        cache_writeback(addr, size);
+}
 
 int __must_check iommu_free_pgtables(struct domain *d);
 struct page_info *__must_check iommu_alloc_pgtable(struct domain *d);
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -268,7 +268,6 @@ struct iommu_ops {
     int (*setup_hpet_msi)(struct msi_desc *);
 
     int (*adjust_irq_affinities)(void);
-    void (*sync_cache)(const void *addr, unsigned int size);
     void (*clear_root_pgtable)(struct domain *d);
     int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
 #endif /* CONFIG_X86 */
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -78,7 +78,6 @@ int __must_check qinval_device_iotlb_syn
                                           struct pci_dev *pdev,
                                           u16 did, u16 size, u64 addr);
 
-unsigned int get_cache_line_size(void);
 void flush_all_cache(void);
 
 uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
--- a/xen/drivers/passthrough/vtd/x86/vtd.c
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c
@@ -47,11 +47,6 @@ void unmap_vtd_domain_page(const void *v
     unmap_domain_page(va);
 }
 
-unsigned int get_cache_line_size(void)
-{
-    return ((cpuid_ebx(1) >> 8) & 0xff) * 8;
-}
-
 void flush_all_cache()
 {
     wbinvd();




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.