# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1200673213 0
# Node ID 420f98121491fdc277a9646682e2c2a77e716ac5
# Parent ec10c9a2d97679c04e384b28bc92c71a574cdd79
minios: support COW for a zero page
Permits to support sparse data.
Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>
---
extras/mini-os/arch/x86/mm.c | 6 +++-
extras/mini-os/arch/x86/traps.c | 44 ++++++++++++++++++++++++++++++++++
extras/mini-os/include/ia64/arch_mm.h | 2 +
extras/mini-os/include/types.h | 7 +++++
extras/mini-os/include/x86/arch_mm.h | 4 +++
extras/mini-os/include/x86/traps.h | 4 +++
6 files changed, 66 insertions(+), 1 deletion(-)
diff -r ec10c9a2d976 -r 420f98121491 extras/mini-os/arch/x86/mm.c
--- a/extras/mini-os/arch/x86/mm.c Fri Jan 18 16:09:05 2008 +0000
+++ b/extras/mini-os/arch/x86/mm.c Fri Jan 18 16:20:13 2008 +0000
@@ -50,6 +50,7 @@
#endif
unsigned long *phys_to_machine_mapping;
+unsigned long mfn_zero;
extern char stack[];
extern void page_walk(unsigned long virt_addr);
@@ -492,10 +493,13 @@ static void clear_bootstrap(void)
static void clear_bootstrap(void)
{
struct xen_memory_reservation reservation;
- xen_pfn_t mfns[] = { virt_to_mfn(0), virt_to_mfn(&shared_info) };
+ xen_pfn_t mfns[] = { virt_to_mfn(&shared_info) };
int n = sizeof(mfns)/sizeof(*mfns);
pte_t nullpte = { };
+ /* Use page 0 as the CoW zero page */
+ memset(NULL, 0, PAGE_SIZE);
+ mfn_zero = pfn_to_mfn(0);
if (HYPERVISOR_update_va_mapping(0, nullpte, UVMF_INVLPG))
printk("Unable to unmap page 0\n");
diff -r ec10c9a2d976 -r 420f98121491 extras/mini-os/arch/x86/traps.c
--- a/extras/mini-os/arch/x86/traps.c Fri Jan 18 16:09:05 2008 +0000
+++ b/extras/mini-os/arch/x86/traps.c Fri Jan 18 16:20:13 2008 +0000
@@ -118,6 +118,46 @@ void page_walk(unsigned long virt_addres
}
+static int handle_cow(unsigned long addr) {
+ pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
+ unsigned long new_page;
+ int rc;
+
+#if defined(__x86_64__)
+ page = tab[l4_table_offset(addr)];
+ if (!(page & _PAGE_PRESENT))
+ return 0;
+ tab = pte_to_virt(page);
+#endif
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
+ page = tab[l3_table_offset(addr)];
+ if (!(page & _PAGE_PRESENT))
+ return 0;
+ tab = pte_to_virt(page);
+#endif
+ page = tab[l2_table_offset(addr)];
+ if (!(page & _PAGE_PRESENT))
+ return 0;
+ tab = pte_to_virt(page);
+
+ page = tab[l1_table_offset(addr)];
+ if (!(page & _PAGE_PRESENT))
+ return 0;
+ /* Only support CoW for the zero page. */
+ if (PHYS_PFN(page) != mfn_zero)
+ return 0;
+
+ new_page = alloc_pages(0);
+ memset((void*) new_page, 0, PAGE_SIZE);
+
+ rc = HYPERVISOR_update_va_mapping(addr & PAGE_MASK,
__pte(virt_to_mach(new_page) | L1_PROT), UVMF_INVLPG);
+ if (!rc)
+ return 1;
+
+ printk("Map zero page to %lx failed: %d.\n", addr, rc);
+ return 0;
+}
+
#define read_cr2() \
(HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].arch.cr2)
@@ -126,6 +166,10 @@ void do_page_fault(struct pt_regs *regs,
void do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
unsigned long addr = read_cr2();
+
+ if ((error_code & TRAP_PF_WRITE) && handle_cow(addr))
+ return;
+
/* If we are already handling a page fault, and got another one
that means we faulted in pagetable walk. Continuing here would cause
a recursive fault */
diff -r ec10c9a2d976 -r 420f98121491 extras/mini-os/include/ia64/arch_mm.h
--- a/extras/mini-os/include/ia64/arch_mm.h Fri Jan 18 16:09:05 2008 +0000
+++ b/extras/mini-os/include/ia64/arch_mm.h Fri Jan 18 16:20:13 2008 +0000
@@ -37,5 +37,7 @@
#define STACK_SIZE (PAGE_SIZE * (1 << STACK_SIZE_PAGE_ORDER))
#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, 0)
+/* TODO */
+#define map_zero(n, a) map_frames_ex(NULL, n, 0, 0, a, DOMID_SELF, 0, 0)
#endif /* __ARCH_MM_H__ */
diff -r ec10c9a2d976 -r 420f98121491 extras/mini-os/include/types.h
--- a/extras/mini-os/include/types.h Fri Jan 18 16:09:05 2008 +0000
+++ b/extras/mini-os/include/types.h Fri Jan 18 16:20:13 2008 +0000
@@ -57,6 +57,13 @@ typedef struct { unsigned long pte; } pt
typedef struct { unsigned long pte; } pte_t;
#endif /* __i386__ || __x86_64__ */
+#if !defined(CONFIG_X86_PAE)
+#define __pte(x) ((pte_t) { (x) } )
+#else
+#define __pte(x) ({ unsigned long long _x = (x); \
+ ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); })
+#endif
+
typedef u8 uint8_t;
typedef s8 int8_t;
typedef u16 uint16_t;
diff -r ec10c9a2d976 -r 420f98121491 extras/mini-os/include/x86/arch_mm.h
--- a/extras/mini-os/include/x86/arch_mm.h Fri Jan 18 16:09:05 2008 +0000
+++ b/extras/mini-os/include/x86/arch_mm.h Fri Jan 18 16:20:13 2008 +0000
@@ -144,12 +144,14 @@ typedef unsigned long pgentry_t;
#if defined(__i386__)
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED)
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER)
#if defined(CONFIG_X86_PAE)
#define L3_PROT (_PAGE_PRESENT)
#endif /* CONFIG_X86_PAE */
#elif defined(__x86_64__)
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_USER)
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
@@ -190,6 +192,7 @@ typedef unsigned long maddr_t;
extern unsigned long *phys_to_machine_mapping;
extern char _text, _etext, _erodata, _edata, _end;
+extern unsigned long mfn_zero;
#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)])
static __inline__ maddr_t phys_to_machine(paddr_t phys)
{
@@ -224,5 +227,6 @@ static __inline__ paddr_t machine_to_phy
#define pte_to_virt(_pte) to_virt(mfn_to_pfn(pte_to_mfn(_pte)) <<
PAGE_SHIFT)
#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, L1_PROT)
+#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, 0,
L1_PROT_RO)
#endif /* _ARCH_MM_H_ */
diff -r ec10c9a2d976 -r 420f98121491 extras/mini-os/include/x86/traps.h
--- a/extras/mini-os/include/x86/traps.h Fri Jan 18 16:09:05 2008 +0000
+++ b/extras/mini-os/include/x86/traps.h Fri Jan 18 16:20:13 2008 +0000
@@ -70,4 +70,8 @@ struct pt_regs {
void dump_regs(struct pt_regs *regs);
+#define TRAP_PF_PROT 0x1
+#define TRAP_PF_WRITE 0x2
+#define TRAP_PF_USER 0x4
+
#endif /* _TRAPS_H_ */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|