This patch adds support to Xen for hugepages in a PV environment. The patch
is against the latest xen-unstable tree on xenbits.xensource.com.
It must be enabled via the command-line option "allowhugepage".
It is assumed that the guest has guaranteed that the hugepage is physically
aligned and contiguous.
There is no support yet for save/restore/migrate.
Signed-off-by: Dave McCracken <dave.mccracken@xxxxxxxxxx>
----
--- xen-unstable//./xen/include/asm-x86/x86_32/page.h 2008-07-17
09:49:27.000000000 -0500
+++ xen-hpage/./xen/include/asm-x86/x86_32/page.h 2008-10-02
15:07:34.000000000 -0500
@@ -112,7 +112,7 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
* Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL.
* Permit the NX bit if the hardware supports it.
*/
-#define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX)
+#define BASE_DISALLOW_MASK (0xFFFFF118U & ~_PAGE_NX)
#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
--- xen-unstable//./xen/include/asm-x86/x86_64/page.h 2008-10-02
14:23:17.000000000 -0500
+++ xen-hpage/./xen/include/asm-x86/x86_64/page.h 2008-10-02
15:07:34.000000000 -0500
@@ -112,7 +112,7 @@ typedef l4_pgentry_t root_pgentry_t;
* Permit the NX bit if the hardware supports it.
* Note that range [62:52] is available for software use on x86/64.
*/
-#define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX)
+#define BASE_DISALLOW_MASK (0xFF800118U & ~_PAGE_NX)
#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
--- xen-unstable//./xen/arch/x86/mm.c 2008-10-02 14:23:17.000000000 -0500
+++ xen-hpage/./xen/arch/x86/mm.c 2008-10-09 09:07:47.000000000 -0500
@@ -160,6 +160,9 @@ unsigned long total_pages;
#define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
+static int opt_allow_hugepage = 0;
+boolean_param("allowhugepage", opt_allow_hugepage);
+
#define l1_disallow_mask(d) \
((d != dom_io) && \
(rangeset_is_empty((d)->iomem_caps) && \
@@ -584,6 +587,26 @@ static int get_page_and_type_from_pagenr
return rc;
}
+static int get_data_page(struct page_info *page, struct domain *d, int
writeable)
+{
+ int rc;
+
+ if ( writeable )
+ rc = get_page_and_type(page, d, PGT_writable_page);
+ else
+ rc = get_page(page, d);
+
+ return rc;
+}
+
+static void put_data_page(struct page_info *page, int writeable)
+{
+ if ( writeable )
+ put_page_and_type(page);
+ else
+ put_page(page);
+}
+
/*
* We allow root tables to map each other (a.k.a. linear page tables). It
* needs some special care with reference counts and access permissions:
@@ -656,6 +679,7 @@ get_page_from_l1e(
struct vcpu *curr = current;
struct domain *owner;
int okay;
+ int writeable;
if ( !(l1f & _PAGE_PRESENT) )
return 1;
@@ -698,10 +722,9 @@ get_page_from_l1e(
* contribute to writeable mapping refcounts. (This allows the
* qemu-dm helper process in dom0 to map the domain's memory without
* messing up the count of "real" writable mappings.) */
- okay = (((l1f & _PAGE_RW) &&
- !(unlikely(paging_mode_external(d) && (d != curr->domain))))
- ? get_page_and_type(page, d, PGT_writable_page)
- : get_page(page, d));
+ writeable = (l1f & _PAGE_RW) &&
+ !( unlikely(paging_mode_external(d) && (d != curr->domain)) );
+ okay = get_data_page(page, d, writeable);
if ( !okay )
{
MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
@@ -759,11 +782,43 @@ get_page_from_l2e(
MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
return -EINVAL;
}
+ if ( l2e_get_flags(l2e) & _PAGE_PSE )
+ {
+ unsigned long mfn = l2e_get_pfn(l2e);
+ unsigned long m, me;
+ struct page_info *page = mfn_to_page(mfn);
+ int writeable;
- rc = get_page_and_type_from_pagenr(
- l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
- if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
- rc = 0;
+ if ( !opt_allow_hugepage )
+ return -EINVAL;
+
+ writeable = l2e_get_flags(l2e) & _PAGE_RW;
+
+ rc = get_data_page(page, d, writeable);
+ if ( unlikely(!rc) )
+ return rc;
+
+ for ( m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++ )
+ {
+ rc = get_data_page(mfn_to_page(m), d, writeable);
+ if ( unlikely(!rc) )
+ {
+ for ( --m; m > mfn; --m )
+ put_data_page(mfn_to_page(m), writeable);
+ put_data_page(page, writeable);
+ return 0;
+ }
+ }
+#ifdef __x86_64__
+ map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn,
L1_PAGETABLE_ENTRIES,
+ PAGE_HYPERVISOR | l2e_get_flags(l2e));
+#endif
+ } else {
+ rc = get_page_and_type_from_pagenr(
+ l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
+ if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+ rc = 0;
+ }
return rc;
}
@@ -955,7 +1010,18 @@ static int put_page_from_l2e(l2_pgentry_
if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
(l2e_get_pfn(l2e) != pfn) )
{
- put_page_and_type(l2e_get_page(l2e));
+ if ( l2e_get_flags(l2e) & _PAGE_PSE )
+ {
+ unsigned long mfn = l2e_get_pfn(l2e);
+ unsigned long m, me;
+ struct page_info *page = mfn_to_page(mfn);
+ int writeable = l2e_get_flags(l2e) & _PAGE_RW;
+
+ for ( m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++ )
+ put_data_page(mfn_to_page(m), writeable);
+ put_data_page(page, writeable);
+ } else
+ put_page_and_type(l2e_get_page(l2e));
return 0;
}
return 1;
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|