WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH RFC V3 07/12] xen: Introduce the Xen mapcache

From: Anthony PERARD <anthony.perard@xxxxxxxxxx>

The mapcache maps chucks of guest memory on demand, unmaps them when
they are not needed anymore.

Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the
lock option, so mapcache will not unmap these ram_ptr.

Signed-off-by: Anthony PERARD <anthony.perard@xxxxxxxxxx>
---
 Makefile.target |    2 +-
 exec.c          |   36 ++++++-
 hw/xen.h        |    4 +
 xen-all.c       |   63 ++++++++++++
 xen-stub.c      |    4 +
 xen_mapcache.c  |  302 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 xen_mapcache.h  |   26 +++++
 7 files changed, 432 insertions(+), 5 deletions(-)
 create mode 100644 xen_mapcache.c
 create mode 100644 xen_mapcache.h

diff --git a/Makefile.target b/Makefile.target
index 6b390e6..ea14393 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -183,7 +183,7 @@ QEMU_CFLAGS += $(VNC_PNG_CFLAGS)
 
 # xen backend driver support
 obj-$(CONFIG_XEN) += xen_machine_pv.o xen_domainbuild.o
-obj-$(CONFIG_XEN) += xen-all.o
+obj-$(CONFIG_XEN) += xen-all.o xen_mapcache.o
 obj-$(CONFIG_NO_XEN) += xen-stub.o
 
 # xen full virtualized machine
diff --git a/exec.c b/exec.c
index 380dab5..f5888eb 100644
--- a/exec.c
+++ b/exec.c
@@ -60,6 +60,9 @@
 #endif
 #endif
 
+#include "hw/xen.h"
+#include "xen_mapcache.h"
+
 //#define DEBUG_TB_INVALIDATE
 //#define DEBUG_FLUSH
 //#define DEBUG_TLB
@@ -2833,6 +2836,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, 
const char *name,
         }
     }
 
+    new_block->offset = find_ram_offset(size);
     if (host) {
         new_block->host = host;
     } else {
@@ -2856,15 +2860,17 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, 
const char *name,
                                    PROT_EXEC|PROT_READ|PROT_WRITE,
                                    MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 #else
-            new_block->host = qemu_vmalloc(size);
+            if (xen_enabled()) {
+                xen_ram_alloc(new_block->offset, size);
+            } else {
+                new_block->host = qemu_vmalloc(size);
+            }
 #endif
 #ifdef MADV_MERGEABLE
             madvise(new_block->host, size, MADV_MERGEABLE);
 #endif
         }
     }
-
-    new_block->offset = find_ram_offset(size);
     new_block->length = size;
 
     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
@@ -2905,7 +2911,11 @@ void qemu_ram_free(ram_addr_t addr)
 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
                 munmap(block->host, block->length);
 #else
-                qemu_vfree(block->host);
+                if (xen_enabled()) {
+                    qemu_invalidate_entry(block->host);
+                } else {
+                    qemu_vfree(block->host);
+                }
 #endif
             }
             qemu_free(block);
@@ -2931,6 +2941,14 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
         if (addr - block->offset < block->length) {
             QLIST_REMOVE(block, next);
             QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
+            if (xen_enabled()) {
+                /* We need to check if the requested address is in the RAM
+                 * because we don't want to map the entire memory in QEMU.
+                 */
+                if (block->offset == 0)
+                    return qemu_map_cache(addr, 0, 1);
+                block->host = qemu_map_cache(block->offset, block->length, 1);
+            }
             return block->host + (addr - block->offset);
         }
     }
@@ -2949,11 +2967,18 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr)
     uint8_t *host = ptr;
 
     QLIST_FOREACH(block, &ram_list.blocks, next) {
+        /* This case append when the block is not mapped. */
+        if (block->host == NULL)
+            continue;
         if (host - block->host < block->length) {
             return block->offset + (host - block->host);
         }
     }
 
+    if (xen_enabled()) {
+        return qemu_ram_addr_from_mapcache(ptr);
+    }
+
     fprintf(stderr, "Bad ram pointer %p\n", ptr);
     abort();
 
@@ -3728,6 +3753,9 @@ void cpu_physical_memory_unmap(void *buffer, 
target_phys_addr_t len,
     if (is_write) {
         cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
     }
+    if (xen_enabled()) {
+        qemu_invalidate_entry(buffer);
+    }
     qemu_vfree(bounce.buffer);
     bounce.buffer = NULL;
     cpu_notify_map_clients();
diff --git a/hw/xen.h b/hw/xen.h
index c5189b1..2b62ff5 100644
--- a/hw/xen.h
+++ b/hw/xen.h
@@ -34,4 +34,8 @@ void xen_piix_pci_write_config_client(uint32_t address, 
uint32_t val, int len);
 
 int xen_init(int smp_cpus);
 
+#ifdef NEED_CPU_H
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size);
+#endif
+
 #endif /* QEMU_HW_XEN_H */
diff --git a/xen-all.c b/xen-all.c
index 765f87a..4e0b061 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -12,6 +12,8 @@
 #include "hw/xen_common.h"
 #include "hw/xen_backend.h"
 
+#include "xen_mapcache.h"
+
 /* Xen specific function for piix pci */
 
 int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
@@ -52,6 +54,63 @@ qemu_irq *i8259_xen_init(void)
     return qemu_allocate_irqs(i8259_set_irq, NULL, 16);
 }
 
+
+/* Memory Ops */
+
+static void xen_ram_init(ram_addr_t ram_size)
+{
+    RAMBlock *new_block;
+    ram_addr_t below_4g_mem_size, above_4g_mem_size = 0;
+
+    new_block = qemu_mallocz(sizeof (*new_block));
+    pstrcpy(new_block->idstr, sizeof (new_block->idstr), "xen.ram");
+    new_block->host = NULL;
+    new_block->offset = 0;
+    new_block->length = ram_size;
+
+    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
+
+    ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
+                                       new_block->length >> TARGET_PAGE_BITS);
+    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
+           0xff, new_block->length >> TARGET_PAGE_BITS);
+
+    if (ram_size >= 0xe0000000 ) {
+        above_4g_mem_size = ram_size - 0xe0000000;
+        below_4g_mem_size = 0xe0000000;
+    } else {
+        below_4g_mem_size = ram_size;
+    }
+
+    cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset);
+#if TARGET_PHYS_ADDR_BITS > 32
+    if (above_4g_mem_size > 0) {
+        cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size,
+                                     new_block->offset + below_4g_mem_size);
+    }
+#endif
+}
+
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
+{
+    unsigned long nr_pfn;
+    xen_pfn_t *pfn_list;
+    int i;
+
+    nr_pfn = size >> TARGET_PAGE_BITS;
+    pfn_list = qemu_malloc(sizeof (*pfn_list) * nr_pfn);
+
+    for (i = 0; i < nr_pfn; i++)
+        pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
+
+    if (xc_domain_memory_populate_physmap(xen_xc, xen_domid, nr_pfn, 0, 0, 
pfn_list)) {
+        hw_error("xen: failed to populate ram at %lx", ram_addr);
+    }
+
+    qemu_free(pfn_list);
+}
+
+
 /* Initialise Xen */
 
 int xen_init(int smp_cpus)
@@ -62,5 +121,9 @@ int xen_init(int smp_cpus)
         return -1;
     }
 
+    /* Init RAM management */
+    qemu_map_cache_init();
+    xen_ram_init(ram_size);
+
     return 0;
 }
diff --git a/xen-stub.c b/xen-stub.c
index 07e64bc..c9f477d 100644
--- a/xen-stub.c
+++ b/xen-stub.c
@@ -24,6 +24,10 @@ void xen_piix_pci_write_config_client(uint32_t address, 
uint32_t val, int len)
 {
 }
 
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
+{
+}
+
 int xen_init(int smp_cpus)
 {
     return -ENOSYS;
diff --git a/xen_mapcache.c b/xen_mapcache.c
new file mode 100644
index 0000000..8e3bf6c
--- /dev/null
+++ b/xen_mapcache.c
@@ -0,0 +1,302 @@
+#include "config.h"
+
+#include "hw/xen_backend.h"
+#include "blockdev.h"
+
+#include <xen/hvm/params.h>
+#include <sys/mman.h>
+
+#include "xen_mapcache.h"
+
+
+//#define MAPCACHE_DEBUG
+
+#ifdef MAPCACHE_DEBUG
+#define DPRINTF(fmt, ...) do { \
+    fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
+} while (0)
+#else
+#define DPRINTF(fmt, ...) do { } while (0)
+#endif
+
+#if defined(MAPCACHE)
+
+#define BITS_PER_LONG (sizeof(long)*8)
+#define BITS_TO_LONGS(bits) \
+    (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#define DECLARE_BITMAP(name,bits) \
+    unsigned long name[BITS_TO_LONGS(bits)]
+#define test_bit(bit,map) \
+    (!!((map)[(bit)/BITS_PER_LONG] & (1UL << ((bit)%BITS_PER_LONG))))
+
+typedef struct MapCacheEntry {
+    unsigned long paddr_index;
+    uint8_t *vaddr_base;
+    DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE>>XC_PAGE_SHIFT);
+    uint8_t lock;
+    struct MapCacheEntry *next;
+} MapCacheEntry;
+
+typedef struct MapCacheRev {
+    uint8_t *vaddr_req;
+    unsigned long paddr_index;
+    QTAILQ_ENTRY(MapCacheRev) next;
+} MapCacheRev;
+
+typedef struct MapCache {
+    MapCacheEntry *entry;
+    unsigned long nr_buckets;
+    QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries;
+
+    /* For most cases (>99.9%), the page address is the same. */
+    unsigned long last_address_index;
+    uint8_t      *last_address_vaddr;
+} MapCache;
+
+static MapCache *mapcache;
+
+
+int qemu_map_cache_init(void)
+{
+    unsigned long size;
+
+    mapcache = qemu_mallocz(sizeof (MapCache));
+
+    QTAILQ_INIT(&mapcache->locked_entries);
+    mapcache->last_address_index = ~0UL;
+
+    mapcache->nr_buckets = (((MAX_MCACHE_SIZE >> XC_PAGE_SHIFT) +
+                   (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
+                  (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
+
+    /*
+     * Use mmap() directly: lets us allocate a big hash table with no up-front
+     * cost in storage space. The OS will allocate memory only for the buckets
+     * that we actually use. All others will contain all zeroes.
+     */
+    size = mapcache->nr_buckets * sizeof(MapCacheEntry);
+    size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
+    DPRINTF("qemu_map_cache_init, nr_buckets = %lx size %lu\n", 
mapcache->nr_buckets, size);
+    mapcache->entry = mmap(NULL, size, PROT_READ|PROT_WRITE,
+                          MAP_SHARED|MAP_ANON, -1, 0);
+    if (mapcache->entry == MAP_FAILED) {
+        errno = ENOMEM;
+        return -1;
+    }
+
+    return 0;
+}
+
+static void qemu_remap_bucket(MapCacheEntry *entry,
+                              target_phys_addr_t size,
+                              unsigned long address_index)
+{
+    uint8_t *vaddr_base;
+    xen_pfn_t *pfns;
+    int *err;
+    unsigned int i, j;
+
+    pfns = qemu_mallocz((size >> XC_PAGE_SHIFT) * sizeof (xen_pfn_t));
+    err = qemu_mallocz((size >> XC_PAGE_SHIFT) * sizeof (int));
+
+    if (entry->vaddr_base != NULL) {
+        errno = munmap(entry->vaddr_base, size);
+        if (errno) {
+            fprintf(stderr, "unmap fails %d\n", errno);
+            exit(-1);
+        }
+    }
+
+    for (i = 0; i < size >> XC_PAGE_SHIFT; i++) {
+        pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
+    }
+
+    vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE,
+                                     pfns, err,
+                                     size >> XC_PAGE_SHIFT);
+    if (vaddr_base == NULL) {
+        fprintf(stderr, "xc_map_foreign_bulk error %d\n", errno);
+        exit(-1);
+    }
+
+    entry->vaddr_base  = vaddr_base;
+    entry->paddr_index = address_index;
+
+    for (i = 0; i < size >> XC_PAGE_SHIFT; i += BITS_PER_LONG) {
+        unsigned long word = 0;
+        j = ((i + BITS_PER_LONG) > (size >> XC_PAGE_SHIFT)) ?
+            (size >> XC_PAGE_SHIFT) % BITS_PER_LONG : BITS_PER_LONG;
+        while (j > 0) {
+            word = (word << 1) | !err[i + --j];
+        }
+        entry->valid_mapping[i / BITS_PER_LONG] = word;
+    }
+
+    qemu_free(pfns);
+    qemu_free(err);
+}
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, 
uint8_t lock)
+{
+    MapCacheEntry *entry, *pentry = NULL;
+    unsigned long address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
+    unsigned long address_offset = phys_addr & (MCACHE_BUCKET_SIZE-1);
+
+    if (address_index == mapcache->last_address_index && !lock)
+        return mapcache->last_address_vaddr + address_offset;
+
+    entry = &mapcache->entry[address_index % mapcache->nr_buckets];
+
+    while (entry && entry->lock && entry->paddr_index != address_index && 
entry->vaddr_base) {
+        pentry = entry;
+        entry = entry->next;
+    }
+    if (!entry) {
+        entry = qemu_mallocz(sizeof(MapCacheEntry));
+        pentry->next = entry;
+        qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index);
+    } else if (!entry->lock) {
+        if (!entry->vaddr_base || entry->paddr_index != address_index || 
!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping))
+            qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, 
address_index);
+    }
+
+    if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) {
+        mapcache->last_address_index = ~0UL;
+        return NULL;
+    }
+
+    mapcache->last_address_index = address_index;
+    mapcache->last_address_vaddr = entry->vaddr_base;
+    if (lock) {
+        MapCacheRev *reventry = qemu_mallocz(sizeof(MapCacheRev));
+        entry->lock++;
+        reventry->vaddr_req = mapcache->last_address_vaddr + address_offset;
+        reventry->paddr_index = mapcache->last_address_index;
+        QTAILQ_INSERT_TAIL(&mapcache->locked_entries, reventry, next);
+    }
+
+    return mapcache->last_address_vaddr + address_offset;
+}
+
+ram_addr_t qemu_ram_addr_from_mapcache(void *ptr)
+{
+    MapCacheRev *reventry;
+    unsigned long paddr_index;
+    int found = 0;
+
+    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+        if (reventry->vaddr_req == ptr) {
+            paddr_index = reventry->paddr_index;
+            found = 1;
+            break;
+        }
+    }
+    if (!found) {
+        fprintf(stderr, "qemu_ram_addr_from_mapcache, could not find %p\n", 
ptr);
+        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+            DPRINTF("   %lx -> %p is present\n", reventry->paddr_index, 
reventry->vaddr_req);
+        }
+        abort();
+        return 0;
+    }
+
+    return paddr_index << MCACHE_BUCKET_SHIFT;
+}
+
+void qemu_invalidate_entry(uint8_t *buffer)
+{
+    MapCacheEntry *entry = NULL, *pentry = NULL;
+    MapCacheRev *reventry;
+    unsigned long paddr_index;
+    int found = 0;
+
+    if (mapcache->last_address_vaddr == buffer)
+        mapcache->last_address_index =  ~0UL;
+
+    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+        if (reventry->vaddr_req == buffer) {
+            paddr_index = reventry->paddr_index;
+            found = 1;
+            break;
+        }
+    }
+    if (!found) {
+        DPRINTF("qemu_invalidate_entry, could not find %p\n", buffer);
+        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+            DPRINTF("   %lx -> %p is present\n", reventry->paddr_index, 
reventry->vaddr_req);
+        }
+        return;
+    }
+    QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next);
+    qemu_free(reventry);
+
+    entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
+    while (entry && entry->paddr_index != paddr_index) {
+        pentry = entry;
+        entry = entry->next;
+    }
+    if (!entry) {
+        DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", 
buffer);
+        return;
+    }
+    entry->lock--;
+    if (entry->lock > 0 || pentry == NULL)
+        return;
+
+    pentry->next = entry->next;
+    errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
+    if (errno) {
+        fprintf(stderr, "unmap fails %d\n", errno);
+        exit(-1);
+    }
+    qemu_free(entry);
+}
+
+void qemu_invalidate_map_cache(void)
+{
+    unsigned long i;
+    MapCacheRev *reventry;
+
+    qemu_aio_flush();
+
+    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+        DPRINTF("There should be no locked mappings at this time, but %lx -> 
%p is present\n", reventry->paddr_index, reventry->vaddr_req);
+    }
+
+    mapcache_lock();
+
+    for (i = 0; i < mapcache->nr_buckets; i++) {
+        MapCacheEntry *entry = &mapcache->entry[i];
+
+        if (entry->vaddr_base == NULL)
+            continue;
+
+        errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
+        if (errno) {
+            fprintf(stderr, "unmap fails %d\n", errno);
+            exit(-1);
+        }
+
+        entry->paddr_index = 0;
+        entry->vaddr_base  = NULL;
+    }
+
+    mapcache->last_address_index =  ~0UL;
+    mapcache->last_address_vaddr = NULL;
+
+    mapcache_unlock();
+}
+#else
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock)
+{
+    return qemu_get_ram_ptr(phys_addr);
+}
+
+void qemu_invalidate_map_cache(void)
+{
+}
+
+void qemu_invalidate_entry(uint8_t *buffer)
+{
+}
+#endif /* !MAPCACHE */
diff --git a/xen_mapcache.h b/xen_mapcache.h
new file mode 100644
index 0000000..5a6730f
--- /dev/null
+++ b/xen_mapcache.h
@@ -0,0 +1,26 @@
+#ifndef XEN_MAPCACHE_H
+#define XEN_MAPCACHE_H
+
+#if (defined(__i386__) || defined(__x86_64__))
+#  define MAPCACHE
+#  if defined(__i386__)
+#    define MAX_MCACHE_SIZE    0x40000000 /* 1GB max for x86 */
+#    define MCACHE_BUCKET_SHIFT 16
+#  elif defined(__x86_64__)
+#    define MAX_MCACHE_SIZE    0x1000000000 /* 64GB max for x86_64 */
+#    define MCACHE_BUCKET_SHIFT 20
+#  endif
+#  define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
+#endif
+
+int      qemu_map_cache_init(void);
+uint8_t  *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t 
size, uint8_t lock);
+ram_addr_t qemu_ram_addr_from_mapcache(void *ptr);
+void     qemu_invalidate_entry(uint8_t *buffer);
+void     qemu_invalidate_map_cache(void);
+
+#define mapcache_lock()   ((void)0)
+#define mapcache_unlock() ((void)0)
+
+
+#endif /* !XEN_MAPCACHE_H */
-- 
1.6.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>