|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v2 02/70] xen/sort: Switch to an extern inline implementation
On Mon, 14 Feb 2022, Julien Grall wrote:
> On 14/02/2022 12:50, Andrew Cooper wrote:
> > There are exactly 3 callers of sort() in the hypervisor. Callbacks in a
> > tight
> > loop like this are problematic for performance, especially with Spectre v2
> > protections, which is why extern inline is used commonly by libraries.
> >
> > Both ARM callers pass in NULL for the swap function, and while this might
> > seem
> > like an attractive option at first, it causes generic_swap() to be used,
> > which
> > forced a byte-wise copy. Provide real swap functions so the compiler can
> > optimise properly, which is very important for ARM downstreams where
> > milliseconds until the system is up matters.
>
> Did you actually benchmark it? Both those lists will have < 128 elements in
> them. So I would be extremely surprised if you save more than a few hundreds
> microseconds with this approach.
>
> So, my opinion on this approach hasn't changed. On v1, we discussed an
> approach that would suit both Stefano and I. Jan seemed to confirm that would
> also suit x86.
This patch series has become 70 patches and for the sake of helping
Andrew move forward in the quickest and most painless way possible, I
append the following using generic_swap as static inline.
Julien, Bertrand, is that acceptable to you?
Andrew, I know this is not your favorite approach but you have quite a
lot of changes to handle -- probably not worth focussing on one detail
which is pretty minor?
---
xen/sort: Switch to an extern inline implementation
There are exactly 3 callers of sort() in the hypervisor. Callbacks in a tight
loop like this are problematic for performance, especially with Spectre v2
protections, which is why extern inline is used commonly by libraries.
Make generic_swap() a static inline and used it at the two ARM call
sites.
No functional change.
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxx>
Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
---
xen/arch/arm/bootfdt.c | 2 +-
xen/arch/arm/io.c | 2 +-
xen/include/xen/sort.h | 67 ++++++++++++++++++++++++++++++++++-
xen/lib/sort.c | 80 ++----------------------------------------
4 files changed, 70 insertions(+), 81 deletions(-)
diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c
index afaa0e249b..0d62945d56 100644
--- a/xen/arch/arm/bootfdt.c
+++ b/xen/arch/arm/bootfdt.c
@@ -472,7 +472,7 @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr)
* the banks sorted in ascending order. So sort them through.
*/
sort(bootinfo.mem.bank, bootinfo.mem.nr_banks, sizeof(struct membank),
- cmp_memory_node, NULL);
+ cmp_memory_node, generic_swap);
early_print_info();
diff --git a/xen/arch/arm/io.c b/xen/arch/arm/io.c
index 729287e37c..1f35aaeea6 100644
--- a/xen/arch/arm/io.c
+++ b/xen/arch/arm/io.c
@@ -170,7 +170,7 @@ void register_mmio_handler(struct domain *d,
/* Sort mmio handlers in ascending order based on base address */
sort(vmmio->handlers, vmmio->num_entries, sizeof(struct mmio_handler),
- cmp_mmio_handler, NULL);
+ cmp_mmio_handler, generic_swap);
write_unlock(&vmmio->lock);
}
diff --git a/xen/include/xen/sort.h b/xen/include/xen/sort.h
index a403652948..f6065eda58 100644
--- a/xen/include/xen/sort.h
+++ b/xen/include/xen/sort.h
@@ -3,8 +3,73 @@
#include <xen/types.h>
+extern gnu_inline
+void generic_swap(void *a, void *b, size_t size)
+{
+ char t;
+
+ do {
+ t = *(char *)a;
+ *(char *)a++ = *(char *)b;
+ *(char *)b++ = t;
+ } while ( --size > 0 );
+}
+
+/*
+ * sort - sort an array of elements
+ * @base: pointer to data to sort
+ * @num: number of elements
+ * @size: size of each element
+ * @cmp: pointer to comparison function
+ * @swap: pointer to swap function or NULL
+ *
+ * This function does a heapsort on the given array. You may provide a
+ * swap function optimized to your element type.
+ *
+ * Sorting time is O(n log n) both on average and worst-case. While
+ * qsort is about 20% faster on average, it suffers from exploitable
+ * O(n*n) worst-case behavior and extra memory requirements that make
+ * it less suitable for kernel use.
+ */
+#ifndef SORT_IMPLEMENTATION
+extern gnu_inline
+#endif
void sort(void *base, size_t num, size_t size,
int (*cmp)(const void *, const void *),
- void (*swap)(void *, void *, size_t));
+ void (*swap)(void *, void *, size_t))
+{
+ /* pre-scale counters for performance */
+ size_t i = (num / 2) * size, n = num * size, c, r;
+
+ /* heapify */
+ while ( i > 0 )
+ {
+ for ( r = i -= size; r * 2 + size < n; r = c )
+ {
+ c = r * 2 + size;
+ if ( (c < n - size) && (cmp(base + c, base + c + size) < 0) )
+ c += size;
+ if ( cmp(base + r, base + c) >= 0 )
+ break;
+ swap(base + r, base + c, size);
+ }
+ }
+
+ /* sort */
+ for ( i = n; i > 0; )
+ {
+ i -= size;
+ swap(base, base + i, size);
+ for ( r = 0; r * 2 + size < i; r = c )
+ {
+ c = r * 2 + size;
+ if ( (c < i - size) && (cmp(base + c, base + c + size) < 0) )
+ c += size;
+ if ( cmp(base + r, base + c) >= 0 )
+ break;
+ swap(base + r, base + c, size);
+ }
+ }
+}
#endif /* __XEN_SORT_H__ */
diff --git a/xen/lib/sort.c b/xen/lib/sort.c
index 35ce0d7abd..b7e78cc0e8 100644
--- a/xen/lib/sort.c
+++ b/xen/lib/sort.c
@@ -4,81 +4,5 @@
* Jan 23 2005 Matt Mackall <mpm@xxxxxxxxxxx>
*/
-#include <xen/types.h>
-
-static void u32_swap(void *a, void *b, size_t size)
-{
- uint32_t t = *(uint32_t *)a;
-
- *(uint32_t *)a = *(uint32_t *)b;
- *(uint32_t *)b = t;
-}
-
-static void generic_swap(void *a, void *b, size_t size)
-{
- char t;
-
- do {
- t = *(char *)a;
- *(char *)a++ = *(char *)b;
- *(char *)b++ = t;
- } while ( --size > 0 );
-}
-
-/*
- * sort - sort an array of elements
- * @base: pointer to data to sort
- * @num: number of elements
- * @size: size of each element
- * @cmp: pointer to comparison function
- * @swap: pointer to swap function or NULL
- *
- * This function does a heapsort on the given array. You may provide a
- * swap function optimized to your element type.
- *
- * Sorting time is O(n log n) both on average and worst-case. While
- * qsort is about 20% faster on average, it suffers from exploitable
- * O(n*n) worst-case behavior and extra memory requirements that make
- * it less suitable for kernel use.
- */
-
-void sort(void *base, size_t num, size_t size,
- int (*cmp)(const void *, const void *),
- void (*swap)(void *, void *, size_t size))
-{
- /* pre-scale counters for performance */
- size_t i = (num / 2) * size, n = num * size, c, r;
-
- if ( !swap )
- swap = (size == 4 ? u32_swap : generic_swap);
-
- /* heapify */
- while ( i > 0 )
- {
- for ( r = i -= size; r * 2 + size < n; r = c )
- {
- c = r * 2 + size;
- if ( (c < n - size) && (cmp(base + c, base + c + size) < 0) )
- c += size;
- if ( cmp(base + r, base + c) >= 0 )
- break;
- swap(base + r, base + c, size);
- }
- }
-
- /* sort */
- for ( i = n; i > 0; )
- {
- i -= size;
- swap(base, base + i, size);
- for ( r = 0; r * 2 + size < i; r = c )
- {
- c = r * 2 + size;
- if ( (c < i - size) && (cmp(base + c, base + c + size) < 0) )
- c += size;
- if ( cmp(base + r, base + c) >= 0 )
- break;
- swap(base + r, base + c, size);
- }
- }
-}
+#define SORT_IMPLEMENTATION
+#include <xen/sort.h>
--
2.25.1
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |