WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [IA64] page ref counter

# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID c644eb4049abedac4bbe83c0497b592b1f4bb531
# Parent  405f0f847c0f33381c69a04a168f34150a2bca7d
[IA64] page ref counter

Signed-off-by: Akio Takebe <takebe_akio@xxxxxxxxxxxxxx>
Signed-off-by: Masaki Kanno <kanno.masaki@xxxxxxxxxxxxxx>

diff -r 405f0f847c0f -r c644eb4049ab xen/arch/ia64/xen/xenmisc.c
--- a/xen/arch/ia64/xen/xenmisc.c       Thu Mar  9 23:24:31 2006
+++ b/xen/arch/ia64/xen/xenmisc.c       Fri Mar 10 15:23:39 2006
@@ -147,12 +147,17 @@
     //memset(percpu_info, 0, sizeof(percpu_info));
 }
 
-#if 0
-void free_page_type(struct page_info *page, unsigned int type)
-{
-       dummy();
-}
-#endif
+void free_page_type(struct page_info *page, u32 type)
+{
+//     dummy();
+       return;
+}
+
+int alloc_page_type(struct page_info *page, u32 type)
+{
+//     dummy();
+       return 1;
+}
 
 ///////////////////////////////
 //// misc memory stuff
@@ -415,3 +420,203 @@
        }
        else printk("sync_split_caches ignored for CPU with no split cache\n");
 }
+
+///////////////////////////////
+// from arch/x86/mm.c
+///////////////////////////////
+
+#ifdef VERBOSE
+#define MEM_LOG(_f, _a...)                           \
+  printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
+         current->domain->domain_id , __LINE__ , ## _a )
+#else
+#define MEM_LOG(_f, _a...) ((void)0)
+#endif
+
+void cleanup_writable_pagetable(struct domain *d)
+{
+  return;
+}
+
+void put_page_type(struct page_info *page)
+{
+    u32 nx, x, y = page->u.inuse.type_info;
+
+ again:
+    do {
+        x  = y;
+        nx = x - 1;
+
+        ASSERT((x & PGT_count_mask) != 0);
+
+        /*
+         * The page should always be validated while a reference is held. The 
+         * exception is during domain destruction, when we forcibly invalidate 
+         * page-table pages if we detect a referential loop.
+         * See domain.c:relinquish_list().
+         */
+        ASSERT((x & PGT_validated) || 
+               test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
+
+        if ( unlikely((nx & PGT_count_mask) == 0) )
+        {
+            /* Record TLB information for flush later. Races are harmless. */
+            page->tlbflush_timestamp = tlbflush_current_time();
+            
+            if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
+                 likely(nx & PGT_validated) )
+            {
+                /*
+                 * Page-table pages must be unvalidated when count is zero. The
+                 * 'free' is safe because the refcnt is non-zero and validated
+                 * bit is clear => other ops will spin or fail.
+                 */
+                if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, 
+                                           x & ~PGT_validated)) != x) )
+                    goto again;
+                /* We cleared the 'valid bit' so we do the clean up. */
+                free_page_type(page, x);
+                /* Carry on, but with the 'valid bit' now clear. */
+                x  &= ~PGT_validated;
+                nx &= ~PGT_validated;
+            }
+        }
+        else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) == 
+                            (PGT_pinned | 1)) &&
+                           ((nx & PGT_type_mask) != PGT_writable_page)) )
+        {
+            /* Page is now only pinned. Make the back pointer mutable again. */
+            nx |= PGT_va_mutable;
+        }
+    }
+    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+}
+
+
+int get_page_type(struct page_info *page, u32 type)
+{
+    u32 nx, x, y = page->u.inuse.type_info;
+
+ again:
+    do {
+        x  = y;
+        nx = x + 1;
+        if ( unlikely((nx & PGT_count_mask) == 0) )
+        {
+            MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
+            return 0;
+        }
+        else if ( unlikely((x & PGT_count_mask) == 0) )
+        {
+            if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
+            {
+                if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
+                {
+                    /*
+                     * On type change we check to flush stale TLB
+                     * entries. This may be unnecessary (e.g., page
+                     * was GDT/LDT) but those circumstances should be
+                     * very rare.
+                     */
+                    cpumask_t mask =
+                        page_get_owner(page)->domain_dirty_cpumask;
+                    tlbflush_filter(mask, page->tlbflush_timestamp);
+
+                    if ( unlikely(!cpus_empty(mask)) )
+                    {
+                        perfc_incrc(need_flush_tlb_flush);
+                        flush_tlb_mask(mask);
+                    }
+                }
+
+                /* We lose existing type, back pointer, and validity. */
+                nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
+                nx |= type;
+
+                /* No special validation needed for writable pages. */
+                /* Page tables and GDT/LDT need to be scanned for validity. */
+                if ( type == PGT_writable_page )
+                    nx |= PGT_validated;
+            }
+        }
+        else
+        {
+            if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
+            {
+                if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+                {
+                    if ( current->domain == page_get_owner(page) )
+                    {
+                        /*
+                         * This ensures functions like set_gdt() see up-to-date
+                         * type info without needing to clean up writable p.t.
+                         * state on the fast path.
+                         */
+                        LOCK_BIGLOCK(current->domain);
+                        cleanup_writable_pagetable(current->domain);
+                        y = page->u.inuse.type_info;
+                        UNLOCK_BIGLOCK(current->domain);
+                        /* Can we make progress now? */
+                        if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) ||
+                             ((y & PGT_count_mask) == 0) )
+                            goto again;
+                    }
+                    if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
+                         ((type & PGT_type_mask) != PGT_l1_page_table) )
+                        MEM_LOG("Bad type (saw %" PRtype_info
+                                " != exp %" PRtype_info ") "
+                                "for mfn %lx (pfn %lx)",
+                                x, type, page_to_mfn(page),
+                                get_gpfn_from_mfn(page_to_mfn(page)));
+                    return 0;
+                }
+                else if ( (x & PGT_va_mask) == PGT_va_mutable )
+                {
+                    /* The va backpointer is mutable, hence we update it. */
+                    nx &= ~PGT_va_mask;
+                    nx |= type; /* we know the actual type is correct */
+                }
+                else if ( ((type & PGT_va_mask) != PGT_va_mutable) &&
+                          ((type & PGT_va_mask) != (x & PGT_va_mask)) )
+                {
+#ifdef CONFIG_X86_PAE
+                    /* We use backptr as extra typing. Cannot be unknown. */
+                    if ( (type & PGT_type_mask) == PGT_l2_page_table )
+                        return 0;
+#endif
+                    /* This table is possibly mapped at multiple locations. */
+                    nx &= ~PGT_va_mask;
+                    nx |= PGT_va_unknown;
+                }
+            }
+            if ( unlikely(!(x & PGT_validated)) )
+            {
+                /* Someone else is updating validation of this page. Wait... */
+                while ( (y = page->u.inuse.type_info) == x )
+                    cpu_relax();
+                goto again;
+            }
+        }
+    }
+    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+
+    if ( unlikely(!(nx & PGT_validated)) )
+    {
+        /* Try to validate page type; drop the new reference on failure. */
+        if ( unlikely(!alloc_page_type(page, type)) )
+        {
+            MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
+                    PRtype_info ": caf=%08x taf=%" PRtype_info,
+                    page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
+                    type, page->count_info, page->u.inuse.type_info);
+            /* Noone else can get a reference. We hold the only ref. */
+            page->u.inuse.type_info = 0;
+            return 0;
+        }
+
+        /* Noone else is updating simultaneously. */
+        __set_bit(_PGT_validated, &page->u.inuse.type_info);
+    }
+
+    return 1;
+}
diff -r 405f0f847c0f -r c644eb4049ab xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Thu Mar  9 23:24:31 2006
+++ b/xen/include/asm-ia64/mm.h Fri Mar 10 15:23:39 2006
@@ -41,32 +41,33 @@
     /* Each frame can be threaded onto a doubly-linked list. */
     struct list_head list;
 
+    /* Reference count and various PGC_xxx flags and fields. */
+    u32 count_info;
+
+    /* Context-dependent fields follow... */
+    union {
+
+        /* Page is in use: ((count_info & PGC_count_mask) != 0). */
+        struct {
+            /* Owner of this page (NULL if page is anonymous). */
+            u32 _domain; /* pickled format */
+            /* Type reference count and various PGT_xxx flags and fields. */
+            unsigned long type_info;
+        } __attribute__ ((packed)) inuse;
+
+        /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
+        struct {
+            /* Order-size of the free chunk this page is the head of. */
+            u32 order;
+            /* Mask of possibly-tainted TLBs. */
+            cpumask_t cpumask;
+        } __attribute__ ((packed)) free;
+
+    } u;
+
     /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
     u32 tlbflush_timestamp;
 
-    /* Reference count and various PGC_xxx flags and fields. */
-    u32 count_info;
-
-    /* Context-dependent fields follow... */
-    union {
-
-        /* Page is in use by a domain. */
-        struct {
-            /* Owner of this page. */
-            u32        _domain;
-            /* Type reference count and various PGT_xxx flags and fields. */
-            u32 type_info;
-        } inuse;
-
-        /* Page is on a free list. */
-        struct {
-            /* Mask of possibly-tainted TLBs. */
-            cpumask_t cpumask;
-            /* Order-size of the free chunk this page is the head of. */
-            u8 order;
-        } free;
-
-    } u;
 #if 0
 // following added for Linux compiling
     page_flags_t flags;
@@ -94,8 +95,15 @@
 #define _PGT_pinned         27
 #define PGT_pinned          (1U<<_PGT_pinned)
 
-/* 27-bit count of uses of this frame as its current type. */
-#define PGT_count_mask      ((1U<<27)-1)
+/* The 11 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift        16
+#define PGT_va_mask         (((1U<<11)-1)<<PGT_va_shift)
+/* Is the back pointer still mutable (i.e. not fixed yet)? */
+#define PGT_va_mutable      (((1U<<11)-1)<<PGT_va_shift)
+/* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
+#define PGT_va_unknown      (((1U<<11)-2)<<PGT_va_shift)
+/* 16-bit count of uses of this frame as its current type. */
+#define PGT_count_mask      ((1U<<16)-1)
 
 /* Cleared when the owning guest 'frees' this page. */
 #define _PGC_allocated      31
@@ -138,7 +146,6 @@
 
 static inline void put_page(struct page_info *page)
 {
-#ifdef VALIDATE_VT     // doesn't work with non-VTI in grant tables yet
     u32 nx, x, y = page->count_info;
 
     do {
@@ -149,14 +156,12 @@
 
     if (unlikely((nx & PGC_count_mask) == 0))
        free_domheap_page(page);
-#endif
 }
 
 /* count_info and ownership are checked atomically. */
 static inline int get_page(struct page_info *page,
                            struct domain *domain)
 {
-#ifdef VALIDATE_VT
     u64 x, nx, y = *((u64*)&page->count_info);
     u32 _domain = pickle_domptr(domain);
 
@@ -172,14 +177,13 @@
            return 0;
        }
     }
-    while(unlikely(y = cmpxchg(&page->count_info, x, nx)) != x);
-#endif
+    while(unlikely((y = cmpxchg((u64*)&page->count_info, x, nx)) != x));
     return 1;
 }
 
-/* No type info now */
-#define put_page_type(page)
-#define get_page_type(page, type) 1
+extern void put_page_type(struct page_info *page);
+extern int get_page_type(struct page_info *page, u32 type);
+
 static inline void put_page_and_type(struct page_info *page)
 {
     put_page_type(page);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [IA64] page ref counter, Xen patchbot -unstable <=