[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] x86emul/test: encourage compiler to use more embedded broadcast


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Thu, 2 Jun 2022 12:28:49 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=h+1PVOQ+hil5P5y2+bKR8TqRq2XSQidRzzSytWExdRk=; b=BfJFkYzuZaLO3vfXnECYVzifeSfAG+cu3aV18RJr0/04r+N8ECEojiE9Pt6/DPHBMSlwl9rMVWdPJMfj0en88BvjOiB3jHMBOGALbCXXB9SItN4NIKr8RjfKKouR/1AYL+hVFWv/g6UxzUpHbPbje70Ft18uHFthYb1+A5FXG8ESQsqGu/QWfM9HZev8ILesCbEOu0mTio905eWs+4i/Nik1HeKgJLGWXWfTIDia0BivFJPH4yI0eimNr78Okd0ntCofzgbD4/pYsUwMnZKWEaebbXupjtUV/uVfjy+ichor5+gboFx5D1jxzvqFewItCoVr+bEJJ7XbOovr7m7Mmw==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=I8GZkWK9MB6p5lsHhqzSrDcyq4X8C0Rilt6M623W7Dz/xqMPqU8HuyYHOEUmodrrpXkq+x8FZEgd+HX3sLpdbB8RdSAJwm6zqyN2AUbjVgC98r4QyOpI0nkCdpgoR394NVKDPkShkZIYxhZcD7Uw63Ts+k5r39Xs0VO5wRIFFRgg5JuByxZkL5hXiZG5QjTDvYGctpZ8qEUOv3k5YXYThvXqYWiLoOV/KY9gbcKHgVEpoEb9XkLubOYVXbVNp3ldAjdc4xFX/a9FsedqWUxY1GCkuimVa0S8pG8LzuxK6RdOmhBfHz4wiSKnU29i5GZ/jp8ZDQg2Q6Tp285jwHQaPw==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Thu, 02 Jun 2022 10:28:57 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

For one it was an oversight to leave dup_{hi,lo}() undefined for 512-bit
vector size. And then in FMA testing we can also arrange for the
compiler to (hopefully) recognize broadcasting potential.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -912,6 +912,13 @@ static inline vec_t movlhps(vec_t x, vec
 })
 #  endif
 # endif
+#elif VEC_SIZE == 64
+# if FLOAT_SIZE == 4
+#  define dup_hi(x) B(movshdup, _mask, x, undef(), ~0)
+#  define dup_lo(x) B(movsldup, _mask, x, undef(), ~0)
+# elif FLOAT_SIZE == 8
+#  define dup_lo(x) B(movddup, _mask, x, undef(), ~0)
+# endif
 #endif
 #if VEC_SIZE == 16 && defined(__SSSE3__) && !defined(__AVX512VL__)
 # if INT_SIZE == 1
--- a/tools/tests/x86_emulator/simd-fma.c
+++ b/tools/tests/x86_emulator/simd-fma.c
@@ -63,6 +63,9 @@ int fma_test(void)
 {
     unsigned int i;
     vec_t x, y, z, src, inv, one;
+#ifdef __AVX512F__
+    typeof(one[0]) one_ = 1;
+#endif
 
     for ( i = 0; i < ELEM_COUNT; ++i )
     {
@@ -71,6 +74,10 @@ int fma_test(void)
         one[i] = 1;
     }
 
+#ifdef __AVX512F__
+# define one one_
+#endif
+
     x = (src + one) * inv;
     y = (src - one) * inv;
     touch(src);
@@ -93,22 +100,28 @@ int fma_test(void)
     x = src + inv;
     y = src - inv;
     touch(inv);
+    touch(one);
     z = src * one + inv;
     if ( !eq(x, z) ) return __LINE__;
 
     touch(inv);
+    touch(one);
     z = -src * one - inv;
     if ( !eq(-x, z) ) return __LINE__;
 
     touch(inv);
+    touch(one);
     z = src * one - inv;
     if ( !eq(y, z) ) return __LINE__;
 
     touch(inv);
+    touch(one);
     z = -src * one + inv;
     if ( !eq(-y, z) ) return __LINE__;
     touch(inv);
 
+#undef one
+
 #if defined(addsub) && defined(fmaddsub)
     x = addsub(src * inv, one);
     y = addsub(src * inv, -one);




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.