594 lines
18 KiB
Plaintext
594 lines
18 KiB
Plaintext
$NetBSD: patch-CVE-2013-1918_9,v 1.1 2013/05/03 16:48:38 drochner Exp $
|
|
|
|
--- xen/arch/x86/mm.c.orig 2013-05-03 13:38:09.000000000 +0000
|
|
+++ xen/arch/x86/mm.c
|
|
@@ -1183,7 +1183,16 @@ static int put_page_from_l3e(l3_pgentry_
|
|
#endif
|
|
|
|
if ( unlikely(partial > 0) )
|
|
+ {
|
|
+ ASSERT(preemptible >= 0);
|
|
return __put_page_type(l3e_get_page(l3e), preemptible);
|
|
+ }
|
|
+
|
|
+ if ( preemptible < 0 )
|
|
+ {
|
|
+ current->arch.old_guest_table = l3e_get_page(l3e);
|
|
+ return 0;
|
|
+ }
|
|
|
|
return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
|
|
}
|
|
@@ -1196,7 +1205,17 @@ static int put_page_from_l4e(l4_pgentry_
|
|
(l4e_get_pfn(l4e) != pfn) )
|
|
{
|
|
if ( unlikely(partial > 0) )
|
|
+ {
|
|
+ ASSERT(preemptible >= 0);
|
|
return __put_page_type(l4e_get_page(l4e), preemptible);
|
|
+ }
|
|
+
|
|
+ if ( preemptible < 0 )
|
|
+ {
|
|
+ current->arch.old_guest_table = l4e_get_page(l4e);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
|
|
}
|
|
return 1;
|
|
@@ -1486,12 +1505,17 @@ static int alloc_l3_table(struct page_in
|
|
if ( rc < 0 && rc != -EAGAIN && rc != -EINTR )
|
|
{
|
|
MEM_LOG("Failure in alloc_l3_table: entry %d", i);
|
|
+ if ( i )
|
|
+ {
|
|
+ page->nr_validated_ptes = i;
|
|
+ page->partial_pte = 0;
|
|
+ current->arch.old_guest_table = page;
|
|
+ }
|
|
while ( i-- > 0 )
|
|
{
|
|
if ( !is_guest_l3_slot(i) )
|
|
continue;
|
|
unadjust_guest_l3e(pl3e[i], d);
|
|
- put_page_from_l3e(pl3e[i], pfn, 0, 0);
|
|
}
|
|
}
|
|
|
|
@@ -1521,22 +1545,24 @@ static int alloc_l4_table(struct page_in
|
|
page->nr_validated_ptes = i;
|
|
page->partial_pte = partial ?: 1;
|
|
}
|
|
- else if ( rc == -EINTR )
|
|
+ else if ( rc < 0 )
|
|
{
|
|
+ if ( rc != -EINTR )
|
|
+ MEM_LOG("Failure in alloc_l4_table: entry %d", i);
|
|
if ( i )
|
|
{
|
|
page->nr_validated_ptes = i;
|
|
page->partial_pte = 0;
|
|
- rc = -EAGAIN;
|
|
+ if ( rc == -EINTR )
|
|
+ rc = -EAGAIN;
|
|
+ else
|
|
+ {
|
|
+ if ( current->arch.old_guest_table )
|
|
+ page->nr_validated_ptes++;
|
|
+ current->arch.old_guest_table = page;
|
|
+ }
|
|
}
|
|
}
|
|
- else if ( rc < 0 )
|
|
- {
|
|
- MEM_LOG("Failure in alloc_l4_table: entry %d", i);
|
|
- while ( i-- > 0 )
|
|
- if ( is_guest_l4_slot(d, i) )
|
|
- put_page_from_l4e(pl4e[i], pfn, 0, 0);
|
|
- }
|
|
if ( rc < 0 )
|
|
return rc;
|
|
|
|
@@ -1966,7 +1992,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
|
|
pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
|
|
}
|
|
|
|
- put_page_from_l3e(ol3e, pfn, 0, 0);
|
|
+ put_page_from_l3e(ol3e, pfn, 0, -preemptible);
|
|
return rc;
|
|
}
|
|
|
|
@@ -2029,7 +2055,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
|
|
return -EFAULT;
|
|
}
|
|
|
|
- put_page_from_l4e(ol4e, pfn, 0, 0);
|
|
+ put_page_from_l4e(ol4e, pfn, 0, -preemptible);
|
|
return rc;
|
|
}
|
|
|
|
@@ -2187,7 +2213,15 @@ static int alloc_page_type(struct page_i
|
|
PRtype_info ": caf=%08lx taf=%" PRtype_info,
|
|
page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
|
|
type, page->count_info, page->u.inuse.type_info);
|
|
- page->u.inuse.type_info = 0;
|
|
+ if ( page != current->arch.old_guest_table )
|
|
+ page->u.inuse.type_info = 0;
|
|
+ else
|
|
+ {
|
|
+ ASSERT((page->u.inuse.type_info &
|
|
+ (PGT_count_mask | PGT_validated)) == 1);
|
|
+ get_page_light(page);
|
|
+ page->u.inuse.type_info |= PGT_partial;
|
|
+ }
|
|
}
|
|
else
|
|
{
|
|
@@ -2725,49 +2759,150 @@ static void put_superpage(unsigned long
|
|
|
|
#endif
|
|
|
|
+static int put_old_guest_table(struct vcpu *v)
|
|
+{
|
|
+ int rc;
|
|
+
|
|
+ if ( !v->arch.old_guest_table )
|
|
+ return 0;
|
|
+
|
|
+ switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table, 1) )
|
|
+ {
|
|
+ case -EINTR:
|
|
+ case -EAGAIN:
|
|
+ return -EAGAIN;
|
|
+ }
|
|
+
|
|
+ v->arch.old_guest_table = NULL;
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+int vcpu_destroy_pagetables(struct vcpu *v)
|
|
+{
|
|
+ unsigned long mfn = pagetable_get_pfn(v->arch.guest_table);
|
|
+ struct page_info *page;
|
|
+ int rc = put_old_guest_table(v);
|
|
+
|
|
+ if ( rc )
|
|
+ return rc;
|
|
+
|
|
+#ifdef __x86_64__
|
|
+ if ( is_pv_32on64_vcpu(v) )
|
|
+ mfn = l4e_get_pfn(*(l4_pgentry_t *)mfn_to_virt(mfn));
|
|
+#endif
|
|
+
|
|
+ if ( mfn )
|
|
+ {
|
|
+ page = mfn_to_page(mfn);
|
|
+ if ( paging_mode_refcounts(v->domain) )
|
|
+ put_page(page);
|
|
+ else
|
|
+ rc = put_page_and_type_preemptible(page, 1);
|
|
+ }
|
|
+
|
|
+#ifdef __x86_64__
|
|
+ if ( is_pv_32on64_vcpu(v) )
|
|
+ {
|
|
+ if ( !rc )
|
|
+ l4e_write(
|
|
+ (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)),
|
|
+ l4e_empty());
|
|
+ }
|
|
+ else
|
|
+#endif
|
|
+ if ( !rc )
|
|
+ {
|
|
+ v->arch.guest_table = pagetable_null();
|
|
+
|
|
+#ifdef __x86_64__
|
|
+ /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */
|
|
+ mfn = pagetable_get_pfn(v->arch.guest_table_user);
|
|
+ if ( mfn )
|
|
+ {
|
|
+ page = mfn_to_page(mfn);
|
|
+ if ( paging_mode_refcounts(v->domain) )
|
|
+ put_page(page);
|
|
+ else
|
|
+ rc = put_page_and_type_preemptible(page, 1);
|
|
+ }
|
|
+ if ( !rc )
|
|
+ v->arch.guest_table_user = pagetable_null();
|
|
+#endif
|
|
+ }
|
|
+
|
|
+ v->arch.cr3 = 0;
|
|
+
|
|
+ return rc;
|
|
+}
|
|
|
|
int new_guest_cr3(unsigned long mfn)
|
|
{
|
|
struct vcpu *curr = current;
|
|
struct domain *d = curr->domain;
|
|
- int okay;
|
|
+ int rc;
|
|
unsigned long old_base_mfn;
|
|
|
|
#ifdef __x86_64__
|
|
if ( is_pv_32on64_domain(d) )
|
|
{
|
|
- okay = paging_mode_refcounts(d)
|
|
- ? 0 /* Old code was broken, but what should it be? */
|
|
- : mod_l4_entry(
|
|
+ rc = paging_mode_refcounts(d)
|
|
+ ? -EINVAL /* Old code was broken, but what should it be? */
|
|
+ : mod_l4_entry(
|
|
__va(pagetable_get_paddr(curr->arch.guest_table)),
|
|
l4e_from_pfn(
|
|
mfn,
|
|
(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
|
|
- pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0;
|
|
- if ( unlikely(!okay) )
|
|
+ pagetable_get_pfn(curr->arch.guest_table), 0, 1, curr);
|
|
+ switch ( rc )
|
|
{
|
|
+ case 0:
|
|
+ break;
|
|
+ case -EINTR:
|
|
+ case -EAGAIN:
|
|
+ return -EAGAIN;
|
|
+ default:
|
|
MEM_LOG("Error while installing new compat baseptr %lx", mfn);
|
|
- return 0;
|
|
+ return rc;
|
|
}
|
|
|
|
invalidate_shadow_ldt(curr, 0);
|
|
write_ptbase(curr);
|
|
|
|
- return 1;
|
|
+ return 0;
|
|
}
|
|
#endif
|
|
- okay = paging_mode_refcounts(d)
|
|
- ? get_page_from_pagenr(mfn, d)
|
|
- : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0);
|
|
- if ( unlikely(!okay) )
|
|
+ rc = put_old_guest_table(curr);
|
|
+ if ( unlikely(rc) )
|
|
+ return rc;
|
|
+
|
|
+ old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
|
|
+ /*
|
|
+ * This is particularly important when getting restarted after the
|
|
+ * previous attempt got preempted in the put-old-MFN phase.
|
|
+ */
|
|
+ if ( old_base_mfn == mfn )
|
|
{
|
|
- MEM_LOG("Error while installing new baseptr %lx", mfn);
|
|
+ write_ptbase(curr);
|
|
return 0;
|
|
}
|
|
|
|
- invalidate_shadow_ldt(curr, 0);
|
|
+ rc = paging_mode_refcounts(d)
|
|
+ ? (get_page_from_pagenr(mfn, d) ? 0 : -EINVAL)
|
|
+ : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 1);
|
|
+ switch ( rc )
|
|
+ {
|
|
+ case 0:
|
|
+ break;
|
|
+ case -EINTR:
|
|
+ case -EAGAIN:
|
|
+ return -EAGAIN;
|
|
+ default:
|
|
+ MEM_LOG("Error while installing new baseptr %lx", mfn);
|
|
+ return rc;
|
|
+ }
|
|
|
|
- old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
|
|
+ invalidate_shadow_ldt(curr, 0);
|
|
|
|
curr->arch.guest_table = pagetable_from_pfn(mfn);
|
|
update_cr3(curr);
|
|
@@ -2776,13 +2911,25 @@ int new_guest_cr3(unsigned long mfn)
|
|
|
|
if ( likely(old_base_mfn != 0) )
|
|
{
|
|
+ struct page_info *page = mfn_to_page(old_base_mfn);
|
|
+
|
|
if ( paging_mode_refcounts(d) )
|
|
- put_page(mfn_to_page(old_base_mfn));
|
|
+ put_page(page);
|
|
else
|
|
- put_page_and_type(mfn_to_page(old_base_mfn));
|
|
+ switch ( rc = put_page_and_type_preemptible(page, 1) )
|
|
+ {
|
|
+ case -EINTR:
|
|
+ rc = -EAGAIN;
|
|
+ case -EAGAIN:
|
|
+ curr->arch.old_guest_table = page;
|
|
+ break;
|
|
+ default:
|
|
+ BUG_ON(rc);
|
|
+ break;
|
|
+ }
|
|
}
|
|
|
|
- return 1;
|
|
+ return rc;
|
|
}
|
|
|
|
static struct domain *get_pg_owner(domid_t domid)
|
|
@@ -2911,12 +3058,29 @@ long do_mmuext_op(
|
|
unsigned int foreigndom)
|
|
{
|
|
struct mmuext_op op;
|
|
- int rc = 0, i = 0, okay;
|
|
unsigned long type;
|
|
- unsigned int done = 0;
|
|
+ unsigned int i = 0, done = 0;
|
|
struct vcpu *curr = current;
|
|
struct domain *d = curr->domain;
|
|
struct domain *pg_owner;
|
|
+ int okay, rc = put_old_guest_table(curr);
|
|
+
|
|
+ if ( unlikely(rc) )
|
|
+ {
|
|
+ if ( likely(rc == -EAGAIN) )
|
|
+ rc = hypercall_create_continuation(
|
|
+ __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone,
|
|
+ foreigndom);
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
|
|
+ likely(guest_handle_is_null(uops)) )
|
|
+ {
|
|
+ /* See the curr->arch.old_guest_table related
|
|
+ * hypercall_create_continuation() below. */
|
|
+ return (int)foreigndom;
|
|
+ }
|
|
|
|
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
|
|
{
|
|
@@ -2941,7 +3105,7 @@ long do_mmuext_op(
|
|
|
|
for ( i = 0; i < count; i++ )
|
|
{
|
|
- if ( hypercall_preempt_check() )
|
|
+ if ( curr->arch.old_guest_table || hypercall_preempt_check() )
|
|
{
|
|
rc = -EAGAIN;
|
|
break;
|
|
@@ -3001,21 +3165,17 @@ long do_mmuext_op(
|
|
page = mfn_to_page(mfn);
|
|
|
|
if ( (rc = xsm_memory_pin_page(d, page)) != 0 )
|
|
- {
|
|
- put_page_and_type(page);
|
|
okay = 0;
|
|
- break;
|
|
- }
|
|
-
|
|
- if ( unlikely(test_and_set_bit(_PGT_pinned,
|
|
- &page->u.inuse.type_info)) )
|
|
+ else if ( unlikely(test_and_set_bit(_PGT_pinned,
|
|
+ &page->u.inuse.type_info)) )
|
|
{
|
|
MEM_LOG("Mfn %lx already pinned", mfn);
|
|
- put_page_and_type(page);
|
|
okay = 0;
|
|
- break;
|
|
}
|
|
|
|
+ if ( unlikely(!okay) )
|
|
+ goto pin_drop;
|
|
+
|
|
/* A page is dirtied when its pin status is set. */
|
|
paging_mark_dirty(pg_owner, mfn);
|
|
|
|
@@ -3029,7 +3189,13 @@ long do_mmuext_op(
|
|
&page->u.inuse.type_info));
|
|
spin_unlock(&pg_owner->page_alloc_lock);
|
|
if ( drop_ref )
|
|
- put_page_and_type(page);
|
|
+ {
|
|
+ pin_drop:
|
|
+ if ( type == PGT_l1_page_table )
|
|
+ put_page_and_type(page);
|
|
+ else
|
|
+ curr->arch.old_guest_table = page;
|
|
+ }
|
|
}
|
|
|
|
break;
|
|
@@ -3059,7 +3225,17 @@ long do_mmuext_op(
|
|
break;
|
|
}
|
|
|
|
- put_page_and_type(page);
|
|
+ switch ( rc = put_page_and_type_preemptible(page, 1) )
|
|
+ {
|
|
+ case -EINTR:
|
|
+ case -EAGAIN:
|
|
+ curr->arch.old_guest_table = page;
|
|
+ rc = 0;
|
|
+ break;
|
|
+ default:
|
|
+ BUG_ON(rc);
|
|
+ break;
|
|
+ }
|
|
put_page(page);
|
|
|
|
/* A page is dirtied when its pin status is cleared. */
|
|
@@ -3069,7 +3245,8 @@ long do_mmuext_op(
|
|
}
|
|
|
|
case MMUEXT_NEW_BASEPTR:
|
|
- okay = new_guest_cr3(gmfn_to_mfn(d, op.arg1.mfn));
|
|
+ rc = new_guest_cr3(gmfn_to_mfn(d, op.arg1.mfn));
|
|
+ okay = !rc;
|
|
break;
|
|
|
|
#ifdef __x86_64__
|
|
@@ -3077,29 +3254,55 @@ long do_mmuext_op(
|
|
unsigned long old_mfn, mfn;
|
|
|
|
mfn = gmfn_to_mfn(d, op.arg1.mfn);
|
|
+ old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
|
|
+ /*
|
|
+ * This is particularly important when getting restarted after the
|
|
+ * previous attempt got preempted in the put-old-MFN phase.
|
|
+ */
|
|
+ if ( old_mfn == mfn )
|
|
+ break;
|
|
+
|
|
if ( mfn != 0 )
|
|
{
|
|
if ( paging_mode_refcounts(d) )
|
|
okay = get_page_from_pagenr(mfn, d);
|
|
else
|
|
- okay = !get_page_and_type_from_pagenr(
|
|
- mfn, PGT_root_page_table, d, 0, 0);
|
|
+ {
|
|
+ rc = get_page_and_type_from_pagenr(
|
|
+ mfn, PGT_root_page_table, d, 0, 1);
|
|
+ okay = !rc;
|
|
+ }
|
|
if ( unlikely(!okay) )
|
|
{
|
|
- MEM_LOG("Error while installing new mfn %lx", mfn);
|
|
+ if ( rc == -EINTR )
|
|
+ rc = -EAGAIN;
|
|
+ else if ( rc != -EAGAIN )
|
|
+ MEM_LOG("Error while installing new mfn %lx", mfn);
|
|
break;
|
|
}
|
|
}
|
|
|
|
- old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
|
|
curr->arch.guest_table_user = pagetable_from_pfn(mfn);
|
|
|
|
if ( old_mfn != 0 )
|
|
{
|
|
+ struct page_info *page = mfn_to_page(old_mfn);
|
|
+
|
|
if ( paging_mode_refcounts(d) )
|
|
- put_page(mfn_to_page(old_mfn));
|
|
+ put_page(page);
|
|
else
|
|
- put_page_and_type(mfn_to_page(old_mfn));
|
|
+ switch ( rc = put_page_and_type_preemptible(page, 1) )
|
|
+ {
|
|
+ case -EINTR:
|
|
+ rc = -EAGAIN;
|
|
+ case -EAGAIN:
|
|
+ curr->arch.old_guest_table = page;
|
|
+ okay = 0;
|
|
+ break;
|
|
+ default:
|
|
+ BUG_ON(rc);
|
|
+ break;
|
|
+ }
|
|
}
|
|
|
|
break;
|
|
@@ -3338,9 +3541,27 @@ long do_mmuext_op(
|
|
}
|
|
|
|
if ( rc == -EAGAIN )
|
|
+ {
|
|
+ ASSERT(i < count);
|
|
rc = hypercall_create_continuation(
|
|
__HYPERVISOR_mmuext_op, "hihi",
|
|
uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
|
|
+ }
|
|
+ else if ( curr->arch.old_guest_table )
|
|
+ {
|
|
+ XEN_GUEST_HANDLE(void) null;
|
|
+
|
|
+ ASSERT(rc || i == count);
|
|
+ set_xen_guest_handle(null, NULL);
|
|
+ /*
|
|
+ * In order to have a way to communicate the final return value to
|
|
+ * our continuation, we pass this in place of "foreigndom", building
|
|
+ * on the fact that this argument isn't needed anymore.
|
|
+ */
|
|
+ rc = hypercall_create_continuation(
|
|
+ __HYPERVISOR_mmuext_op, "hihi", null,
|
|
+ MMU_UPDATE_PREEMPTED, null, rc);
|
|
+ }
|
|
|
|
put_pg_owner(pg_owner);
|
|
|
|
@@ -3367,11 +3588,28 @@ long do_mmu_update(
|
|
void *va;
|
|
unsigned long gpfn, gmfn, mfn;
|
|
struct page_info *page;
|
|
- int rc = 0, okay = 1, i = 0;
|
|
- unsigned int cmd, done = 0, pt_dom;
|
|
- struct vcpu *v = current;
|
|
+ unsigned int cmd, i = 0, done = 0, pt_dom;
|
|
+ struct vcpu *curr = current, *v = curr;
|
|
struct domain *d = v->domain, *pt_owner = d, *pg_owner;
|
|
struct domain_mmap_cache mapcache;
|
|
+ int rc = put_old_guest_table(curr), okay = 1;
|
|
+
|
|
+ if ( unlikely(rc) )
|
|
+ {
|
|
+ if ( likely(rc == -EAGAIN) )
|
|
+ rc = hypercall_create_continuation(
|
|
+ __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone,
|
|
+ foreigndom);
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
|
|
+ likely(guest_handle_is_null(ureqs)) )
|
|
+ {
|
|
+ /* See the curr->arch.old_guest_table related
|
|
+ * hypercall_create_continuation() below. */
|
|
+ return (int)foreigndom;
|
|
+ }
|
|
|
|
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
|
|
{
|
|
@@ -3420,7 +3658,7 @@ long do_mmu_update(
|
|
|
|
for ( i = 0; i < count; i++ )
|
|
{
|
|
- if ( hypercall_preempt_check() )
|
|
+ if ( curr->arch.old_guest_table || hypercall_preempt_check() )
|
|
{
|
|
rc = -EAGAIN;
|
|
break;
|
|
@@ -3685,9 +3923,27 @@ long do_mmu_update(
|
|
}
|
|
|
|
if ( rc == -EAGAIN )
|
|
+ {
|
|
+ ASSERT(i < count);
|
|
rc = hypercall_create_continuation(
|
|
__HYPERVISOR_mmu_update, "hihi",
|
|
ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
|
|
+ }
|
|
+ else if ( curr->arch.old_guest_table )
|
|
+ {
|
|
+ XEN_GUEST_HANDLE(void) null;
|
|
+
|
|
+ ASSERT(rc || i == count);
|
|
+ set_xen_guest_handle(null, NULL);
|
|
+ /*
|
|
+ * In order to have a way to communicate the final return value to
|
|
+ * our continuation, we pass this in place of "foreigndom", building
|
|
+ * on the fact that this argument isn't needed anymore.
|
|
+ */
|
|
+ rc = hypercall_create_continuation(
|
|
+ __HYPERVISOR_mmu_update, "hihi", null,
|
|
+ MMU_UPDATE_PREEMPTED, null, rc);
|
|
+ }
|
|
|
|
put_pg_owner(pg_owner);
|
|
|