# HG changeset patch
# User yamahata@xxxxxxxxxxxxx
# Date 1179127409 -32400
# Node ID d9255a174eef04c9d4aa3346b61871689735c5d0
# Parent  22101e2f73b60b71efb0ab2736dd9d1b0aa60260
allow conflicted rid allocation for >64 domain context switch.
region register value is treated as unsigned long so that the related constants
should be unsigned long.
remove unused constants XEN_DEFAULT_RID.
move rid related constants to regionreg.h
clean up vmMangleRID. replace uint with ulong
PATCHNAME: allow_conflicted_rid_allocation

Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>

diff -r 22101e2f73b6 -r d9255a174eef xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c	Mon May 14 11:33:22 2007 +0900
+++ b/xen/arch/ia64/xen/domain.c	Mon May 14 16:23:29 2007 +0900
@@ -84,15 +84,54 @@ ia64_disable_vhpt_walker(void)
 	ia64_set_pta(VHPT_SIZE_LOG2 << 2);
 }
 
+struct last_domain_entry {
+	struct domain *last_domain;
+	u32 tlbflush_timestamp;
+};
+typedef struct last_domain_entry last_domain_t[IA64_MAX_RID_BLOCKS];
+DEFINE_PER_CPU(last_domain_t, last_domain);
+
+void
+init_context_switch(void)
+{
+	int cpu;
+	for_each_cpu(cpu) {
+		unsigned int i;
+		struct last_domain_entry *last_domain_cpu =
+			per_cpu(last_domain, cpu);
+		for (i = 0; i < IA64_MAX_RID_BLOCKS; i++) {
+			last_domain_cpu[i].last_domain = NULL;
+			last_domain_cpu[i].tlbflush_timestamp =
+				tlbflush_current_time();
+		}
+	}
+
+}
+
 static void flush_vtlb_for_context_switch(struct vcpu* prev, struct vcpu* next)
 {
 	int cpu = smp_processor_id();
 	int last_vcpu_id, last_processor;
-
-	if (!is_idle_domain(prev->domain))
+	struct last_domain_entry *last_domain_cpu =
+		__ia64_per_cpu_var(last_domain);
+	u32 last_tlbflush_timestamp;
+	bool_t rid_collision = 0;
+	unsigned int i;
+
+	if (!is_idle_domain(prev->domain)) {
+		u32 current_time = tlbflush_current_time();
 		tlbflush_update_time
 			(&prev->domain->arch.last_vcpu[cpu].tlbflush_timestamp,
-			 tlbflush_current_time());
+			 current_time);
+		for (i = prev->arch.starting_rid_index;
+		     i < prev->arch.ending_rid_index;
+		     i++) {
+			last_domain_cpu[i].last_domain = prev->domain;
+			tlbflush_update_time
+				(&last_domain_cpu[i].tlbflush_timestamp,
+				 current_time);
+		}
+	}
 
 	if (is_idle_domain(next->domain))
 		return;
@@ -103,15 +142,28 @@ static void flush_vtlb_for_context_switc
 	next->domain->arch.last_vcpu[cpu].vcpu_id = next->vcpu_id;
 	next->arch.last_processor = cpu;
 
-	if ((last_vcpu_id != next->vcpu_id &&
+	last_tlbflush_timestamp =
+		next->domain->arch.last_vcpu[cpu].tlbflush_timestamp;
+	for (i = next->arch.starting_rid_index;
+	     i < next->arch.ending_rid_index;
+	     i++) {
+		if (last_domain_cpu[i].last_domain != NULL &&
+		    last_domain_cpu[i].last_domain != next->domain &&
+		    NEED_FLUSH(last_tlbflush_timestamp,
+			       last_domain_cpu[i].tlbflush_timestamp) &&
+		    NEED_FLUSH(__get_cpu_var(tlbflush_time),
+			       last_domain_cpu[i].tlbflush_timestamp)) {
+			rid_collision = 1;
+			break;
+		}
+	}
+
+	if (rid_collision ||
+	    (last_vcpu_id != next->vcpu_id &&
 	     last_vcpu_id != INVALID_VCPU_ID) ||
 	    (last_vcpu_id == next->vcpu_id &&
 	     last_processor != cpu &&
 	     last_processor != INVALID_PROCESSOR)) {
-#ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
-		u32 last_tlbflush_timestamp =
-			next->domain->arch.last_vcpu[cpu].tlbflush_timestamp;
-#endif
 		int vhpt_is_flushed = 0;
 
 		// if the vTLB implementation was changed,
@@ -128,8 +180,9 @@ static void flush_vtlb_for_context_switc
 				vhpt_is_flushed = 1;
 			}
 		}
-		if (vhpt_is_flushed || NEED_FLUSH(__get_cpu_var(tlbflush_time),
-		                                  last_tlbflush_timestamp)) {
+		if (vhpt_is_flushed || rid_collision ||
+		    NEED_FLUSH(__get_cpu_var(tlbflush_time),
+			       last_tlbflush_timestamp)) {
 			local_flush_tlb_all();
 			perfc_incr(tlbflush_clock_cswitch_purge);
 		} else {
@@ -433,6 +486,8 @@ int vcpu_initialise(struct vcpu *v)
 	       this case we don't want to share rid among vcpus, but we may
 	       do it if two vcpus are on the same cpu... */
 
+	    v->arch.starting_rid_index = d->arch.starting_rid_index;
+	    v->arch.ending_rid_index = d->arch.ending_rid_index;
 	    v->arch.starting_rid = d->arch.starting_rid;
 	    v->arch.ending_rid = d->arch.ending_rid;
 	    v->arch.breakimm = d->arch.breakimm;
diff -r 22101e2f73b6 -r d9255a174eef xen/arch/ia64/xen/regionreg.c
--- a/xen/arch/ia64/xen/regionreg.c	Mon May 14 11:33:22 2007 +0900
+++ b/xen/arch/ia64/xen/regionreg.c	Mon May 14 16:23:29 2007 +0900
@@ -23,25 +23,15 @@ extern void ia64_new_rr7(unsigned long r
    than the host and the host rid space is shared among the domains.  (Values
    in parenthesis are usual default values).
 
-   The host rid space is partitionned into MAX_RID_BLOCKS (= 64)
+   The host rid space is partitionned into IA64_MAX_RID_BLOCKS (= 64)
    blocks of 2**IA64_MIN_IMPL_RID_BITS (= 18) rids.  The first block is also
-   partitionned into MAX_RID_BLOCKS small blocks.  Small blocks are used for
-   metaphysical rids.  Small block 0 can't be allocated and is reserved for
+   partitionned into IA64_MAX_RID_BLOCKS small blocks.  Small blocks are used
+   for metaphysical rids.  Small block 0 can't be allocated and is reserved for
    Xen own rids during boot.
 
    Blocks and small blocks are allocated together and a domain may
    have one or more consecutive blocks (and small blocks).
 */
-
-/* Minimum number of RID bits for a domain.  The current value is 18, which is
-   the minimum defined by the itanium architecture, but it can be lowered
-   to increase the number of domain.  */
-#define	IA64_MIN_IMPL_RID_BITS	(IA64_MIN_IMPL_RID_MSB+1)
-/* Maximum number of RID bits.  This is definitly 24.  */
-#define	IA64_MAX_IMPL_RID_BITS	24
-
-/* Maximum number of blocks.  */
-#define	MAX_RID_BLOCKS	(1 << (IA64_MAX_IMPL_RID_BITS-IA64_MIN_IMPL_RID_BITS))
 
 /* Default number of rid bits for domains.  */
 static unsigned int domain_rid_bits_default = IA64_MIN_IMPL_RID_BITS;
@@ -85,7 +75,8 @@ static unsigned long allocate_metaphysic
 
 static int implemented_rid_bits = 0;
 static int mp_rid_shift;
-static struct domain *ridblock_owner[MAX_RID_BLOCKS] = { 0 };
+static unsigned int ridblock_owner[IA64_MAX_RID_BLOCKS] =
+	{ [0 ... (IA64_MAX_RID_BLOCKS - 1)] = 0 };
 
 void init_rid_allocator (void)
 {
@@ -130,6 +121,8 @@ void init_rid_allocator (void)
 	
 	mp_rid_shift = IA64_MIN_IMPL_RID_BITS - log_blocks;
 	BUG_ON (mp_rid_shift < 3);
+
+	init_context_switch();
 }
 
 
@@ -140,6 +133,8 @@ int allocate_rid_range(struct domain *d,
 int allocate_rid_range(struct domain *d, unsigned long ridbits)
 {
 	int i, j, n_rid_blocks;
+	unsigned int min_domains = UINT_MAX;
+	unsigned int rid_start;
 
 	if (ridbits == 0)
 		ridbits = domain_rid_bits_default;
@@ -154,36 +149,49 @@ int allocate_rid_range(struct domain *d,
 	n_rid_blocks = 1UL << (ridbits - IA64_MIN_IMPL_RID_BITS);
 	
 	// skip over block 0, reserved for "meta-physical mappings (and Xen)"
-	for (i = n_rid_blocks; i < MAX_RID_BLOCKS; i += n_rid_blocks) {
-		if (ridblock_owner[i] == NULL) {
-			for (j = i; j < i + n_rid_blocks; ++j) {
-				if (ridblock_owner[j]) {
-					++j;
-					break;
-				}
+	rid_start = n_rid_blocks;
+	for (i = n_rid_blocks; i < IA64_MAX_RID_BLOCKS; i += n_rid_blocks) {
+		unsigned int domains = 0;
+		if (ridblock_owner[i] == 0) {
+			for (j = i; j < i + n_rid_blocks; ++j)
+				domains += ridblock_owner[j];
+
+			/*
+			 * Heuristic. Try to guess the least used rids.
+			 * This is based on the number of domains.
+			 * Another candidate would be one based on the number
+			 * of vcpus. 
+			 * Better heuristic?
+			 * i.e. What is better definition of
+			 *      'the least used rid'?
+			 */
+			if (domains < min_domains) {
+				min_domains = domains;
+				rid_start = i;
 			}
-			--j;
-			if (ridblock_owner[j] == NULL)
+			if (min_domains == 0)
 				break;
 		}
 	}
-	
-	if (i >= MAX_RID_BLOCKS)
-		return 0;
 	
 	// found an unused block:
 	//   (i << min_rid_bits) <= rid < ((i + n) << min_rid_bits)
 	// mark this block as owned
-	for (j = i; j < i + n_rid_blocks; ++j)
-		ridblock_owner[j] = d;
+	for (j = rid_start; j < rid_start + n_rid_blocks; ++j)
+		ridblock_owner[j]++;
 	
 	// setup domain struct
 	d->arch.rid_bits = ridbits;
-	d->arch.starting_rid = i << IA64_MIN_IMPL_RID_BITS;
-	d->arch.ending_rid = (i+n_rid_blocks) << IA64_MIN_IMPL_RID_BITS;
-	
-	d->arch.starting_mp_rid = i << mp_rid_shift;
-	d->arch.ending_mp_rid = (i + 1) << mp_rid_shift;
+
+	d->arch.starting_rid_index = rid_start;
+	d->arch.ending_rid_index = rid_start + n_rid_blocks;
+
+	d->arch.starting_rid = rid_start << IA64_MIN_IMPL_RID_BITS;
+	d->arch.ending_rid =
+		(rid_start + n_rid_blocks) << IA64_MIN_IMPL_RID_BITS;
+	
+	d->arch.starting_mp_rid = rid_start << mp_rid_shift;
+	d->arch.ending_mp_rid = (rid_start + 1) << mp_rid_shift;
 
 	d->arch.metaphysical_rr0 = allocate_metaphysical_rr(d, 0);
 	d->arch.metaphysical_rr4 = allocate_metaphysical_rr(d, 1);
@@ -207,10 +215,8 @@ int deallocate_rid_range(struct domain *
 		return 1;
 
 	
-	for (i = rid_block_start; i < rid_block_end; ++i) {
-	        ASSERT(ridblock_owner[i] == d);
-		ridblock_owner[i] = NULL;
-	}
+	for (i = rid_block_start; i < rid_block_end; ++i)
+		ridblock_owner[i]--;
 
 	d->arch.rid_bits = 0;
 	d->arch.starting_rid = 0;
diff -r 22101e2f73b6 -r d9255a174eef xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h	Mon May 14 11:33:22 2007 +0900
+++ b/xen/include/asm-ia64/domain.h	Mon May 14 16:23:29 2007 +0900
@@ -23,6 +23,7 @@ extern void relinquish_vcpu_resources(st
 extern void relinquish_vcpu_resources(struct vcpu *v);
 extern void vcpu_share_privregs_with_guest(struct vcpu *v);
 extern int vcpu_late_initialise(struct vcpu *v);
+extern void init_context_switch(void);
 
 /* given a current domain metaphysical address, return the physical address */
 extern unsigned long translate_domain_mpaddr(unsigned long mpaddr,
@@ -92,6 +93,9 @@ struct arch_domain {
     /* Big range.  */
     int starting_rid;		/* first RID assigned to domain */
     int ending_rid;		/* one beyond highest RID assigned to domain */
+    /* for context switch */
+    int starting_rid_index;
+    int ending_rid_index;
     /* Metaphysical range.  */
     int starting_mp_rid;
     int ending_mp_rid;
@@ -179,6 +183,8 @@ struct arch_vcpu {
     int breakimm;			// from arch_domain (so is pinned)
     int starting_rid;		/* first RID assigned to domain */
     int ending_rid;		/* one beyond highest RID assigned to domain */
+    int starting_rid_index;     // from arch_domain (so is pinned)
+    int ending_rid_index;       // from arch_domain (so is pinned)
 
     struct thread_struct _thread;	// this must be last
 
diff -r 22101e2f73b6 -r d9255a174eef xen/include/asm-ia64/flushtlb.h
--- a/xen/include/asm-ia64/flushtlb.h	Mon May 14 11:33:22 2007 +0900
+++ b/xen/include/asm-ia64/flushtlb.h	Mon May 14 16:23:29 2007 +0900
@@ -73,7 +73,7 @@ DECLARE_PER_CPU(volatile u32, vhpt_tlbfl
 #define tlbflush_current_time()                 (0)
 #define tlbflush_clock_inc_and_return()         (0)
 #define tlbflush_update_time(time, timestamp)   do {(void)timestamp;} while (0)
-#define NEED_FLUSH(obj_stamp, lastuse_stamp)    (1)
+#define NEED_FLUSH(obj_stamp, lastuse_stamp)    ({(void)lastuse_stamp; 1;})
 
 #endif /* CONFIG_XEN_IA64_TLBFLUSH_CLOCK */
 
diff -r 22101e2f73b6 -r d9255a174eef xen/include/asm-ia64/regionreg.h
--- a/xen/include/asm-ia64/regionreg.h	Mon May 14 11:33:22 2007 +0900
+++ b/xen/include/asm-ia64/regionreg.h	Mon May 14 16:23:29 2007 +0900
@@ -1,8 +1,21 @@
 #ifndef _REGIONREG_H_
 #define _REGIONREG_H_
 
-#define XEN_DEFAULT_RID         7
 #define IA64_MIN_IMPL_RID_MSB   17
+
+/* Minimum number of RID bits for a domain.  The current value is 18, which is
+   the minimum defined by the itanium architecture, but it can be lowered
+   to increase the number of domain.  */
+#define IA64_MIN_IMPL_RID_BITS  (IA64_MIN_IMPL_RID_MSB+1)
+
+/* Maximum number of RID bits.  This is definitly 24.  */
+#define IA64_MAX_IMPL_RID_BITS  24
+
+/* Maximum number of blocks.  */
+#define IA64_MAX_RID_BLOCKS                                 \
+    (1 << (IA64_MAX_IMPL_RID_BITS-IA64_MIN_IMPL_RID_BITS))
+
+
 #define _REGION_ID(x)           ({ia64_rr _v; _v.rrval = (long)(x); _v.rid;})
 #define _REGION_PAGE_SIZE(x)    ({ia64_rr _v; _v.rrval = (long)(x); _v.ps;})
 #define _REGION_HW_WALKER(x)    ({ia64_rr _v; _v.rrval = (long)(x); _v.ve;})
@@ -23,17 +36,17 @@ typedef union ia64_rr {
 //
 // region register macros
 //
-#define RR_TO_VE(arg)   (((arg) >> 0) & 0x0000000000000001)
-#define RR_VE(arg)      (((arg) & 0x0000000000000001) << 0)
-#define RR_VE_MASK      0x0000000000000001L
+#define RR_TO_VE(arg)   (((arg) >> 0) & 0x0000000000000001UL)
+#define RR_VE(arg)      (((arg) & 0x0000000000000001UL) << 0)
+#define RR_VE_MASK      0x0000000000000001UL
 #define RR_VE_SHIFT     0
-#define RR_TO_PS(arg)   (((arg) >> 2) & 0x000000000000003f)
-#define RR_PS(arg)      (((arg) & 0x000000000000003f) << 2)
-#define RR_PS_MASK      0x00000000000000fcL
+#define RR_TO_PS(arg)   (((arg) >> 2) & 0x000000000000003fUL)
+#define RR_PS(arg)      (((arg) & 0x000000000000003fUL) << 2)
+#define RR_PS_MASK      0x00000000000000fcUL
 #define RR_PS_SHIFT     2
-#define RR_TO_RID(arg)  (((arg) >> 8) & 0x0000000000ffffff)
-#define RR_RID(arg)     (((arg) & 0x0000000000ffffff) << 8)
-#define RR_RID_MASK     0x00000000ffffff00L
+#define RR_TO_RID(arg)  (((arg) >> 8) & 0x0000000000ffffffUL)
+#define RR_RID(arg)     (((arg) & 0x0000000000ffffffUL) << 8)
+#define RR_RID_MASK     0x00000000ffffff00UL
 
 
 int set_one_rr(unsigned long rr, unsigned long val);
@@ -52,17 +65,17 @@ vmMangleRID(unsigned long RIDVal)
 {
     union bits64 {
         unsigned char bytes[4];
-        unsigned long uint;
+        unsigned long ulong;
     };
     union bits64 t;
     unsigned char tmp;
 
-    t.uint = RIDVal;
+    t.ulong = RIDVal;
     tmp = t.bytes[1];
     t.bytes[1] = t.bytes[3];
     t.bytes[3] = tmp;
 
-    return t.uint;
+    return t.ulong;
 }
 
 // since vmMangleRID is symmetric, use it for unmangling also