Added copy_vma() function.

Sorting out return_from_fork() and kernel stack setup for the child.
This commit is contained in:
Bahadir Balban
2008-08-21 12:18:38 +03:00
parent 69db3a04c0
commit dada3e0b2c
5 changed files with 114 additions and 427 deletions

View File

@@ -60,6 +60,11 @@ int thread_start(struct task_ids *ids)
return -EINVAL;
}
int setup_new_ktcb(struct ktcb *new, struct ktcb *orig)
{
/* Setup new thread stack */
new->context.sp =
}
/*
* Creates a thread, with a new thread id, and depending on the flags,
@@ -112,6 +117,10 @@ out:
waitqueue_head_init(&new->wqh_send);
waitqueue_head_init(&new->wqh_recv);
/* When space is copied kernel-side tcb and stack are also copied */
if (flags == THREAD_CREATE_COPYSPC)
setup_new_ktcb(new, task);
/* Add task to global hlist of tasks */
add_task_global(new);

View File

@@ -122,7 +122,8 @@ struct vm_pager {
struct vm_object {
int npages; /* Number of pages in memory */
int refcnt; /* Number of shadows (or vmas) that refer */
struct list_head shadowers; /* List of links to the vm object that shadows this one */
struct list_head shref; /* Shadow reference from original object */
struct list_head shadowers; /* List of vm objects that shadows this one */
struct vm_object *orig_obj; /* Original object that this one shadows */
unsigned int flags; /* Defines the type and flags of the object */
struct list_head list; /* List of all vm objects in memory */
@@ -142,13 +143,6 @@ struct vm_file {
/* To create per-vma vm_object lists */
struct vm_obj_link {
struct list_head list;
/*
* Ref to shadowers by original objects. This could be in the shadow
* object itself, but then we would not be able to reach its link
* when trying to free it.
*/
struct list_head shref;
struct vm_object *obj;
};

View File

@@ -7,42 +7,52 @@
#include <vm_area.h>
#include <task.h>
/*
* Copy all vmas from the given task and populate each with
* links to every object that the original vma is linked to.
* Note, that we don't copy vm objects but just the links to
* them, because vm objects are not per-process data.
*/
int copy_vmas(struct tcb *to, struct tcb *from)
{
struct vm_area *vma, new;
struct vm_area *vma, new_vma;
struct vm_obj_link *vmo_link, *new_link;
list_for_each_entry(vma, from->vm_area_list, list) {
/* Create a new vma */
new = vma_new(vma->pfn_start, vma->pfn_end - vma->pfn_start,
vma->flags, vma->file_offset);
new_vma = vma_new(vma->pfn_start, vma->pfn_end - vma->pfn_start,
vma->flags, vma->file_offset);
/*
* Populate it with links to every object that the original
* vma is linked to. Note, that we don't copy vm objects but
* just the links to them, because vm objects are not
* per-process data.
*/
/* Get the first object on the vma */
BUG_ON(list_empty(&vma->vm_obj_list));
vmo_link = list_entry(vma->vm_obj_list.next,
struct vm_obj_link, list);
do {
/* Create a new link */
new_link = vm_objlink_create();
/* Get the first object, either original file or a shadow */
if (!(vmo_link = vma_next_link(&vma->vm_obj_list, &vma->vm_obj_list))) {
printf("%s:%s: No vm object in vma!\n",
__TASKNAME__, __FUNCTION__);
BUG();
}
/* Create a new link */
new_link = vm_objlink_create();
/* Copy all fields from original link.
* E.g. if ori
/* Copy object field from original link. */
new_link->obj = vmo_link->obj;
/* Add the new link to vma in object order */
list_add_tail(&new_link->list, &new_vma->vm_obj_list);
/* Continue traversing links, doing the same copying */
} while((vmo_link = vma_next_link(&vmo_link->list,
&vma->vm_obj_list)));
/* All link copying is finished, now add the new vma to task */
list_add_tail(&vma_new->list, &to->vm_area_list);
}
return 0;
}
int copy_tcb(struct tcb *to, struct tcb *from)
{
/* Copy program segments, file descriptors, vm areas */
/* Copy program segment boundary information */
to->start = from->start;
to->end = from->end;
to->text_start = from->text_start;
@@ -62,8 +72,6 @@ int copy_tcb(struct tcb *to, struct tcb *from)
to->map_start = from->map_start;
to->map_end = from->map_end;
/* UTCB ??? */
BUG();
/* Copy all vm areas */
copy_vmas(to, from);
@@ -75,8 +83,8 @@ int copy_tcb(struct tcb *to, struct tcb *from)
int do_fork(struct tcb *parent)
{
struct task_ids ids = { .tid = TASK_ID_INVALID, .spid = parent->spid };
struct tcb *child;
struct task_ids ids = { .tid = TASK_ID_INVALID, .spid = TASK_ID_INVALID };
/*
* Allocate and copy parent pgd + all pmds to child.
@@ -95,20 +103,30 @@ int do_fork(struct tcb *parent)
/* Make all shadows in this task read-only */
vm_freeze_shadows(parent);
/* Create a new L4 thread with parent's page tables copied */
ids.spid = parent->spid;
/*
* Create a new L4 thread with parent's page tables
* kernel stack and kernel-side tcb copied
*/
child = task_create(&ids, THREAD_CREATE_COPYSPACE);
/* Copy parent tcb to child */
copy_tcb(child, parent);
/* FIXME: Need to copy parent register values to child ??? */
/* Create new utcb for child since it can't use its parent's */
child->utcb = utcb_vaddr_new();
/* Create the utcb shared memory segment available for child to shmat() */
if (IS_ERR(shm = shm_new((key_t)child->utcb, __pfn(DEFAULT_UTCB_SIZE)))) {
l4_ipc_return((int)shm);
return 0;
}
/* FIXME: We should munmap() parent's utcb page from child */
/* Notify fs0 about forked process */
vfs_send_fork(parent, child);
/* Start forked child */
l4_thread_start(child);
/* Start forked child. FIXME: Return ipc to child as well ??? */
l4_thread_control(THREAD_START, child);
/* Return back to parent */
l4_ipc_return(0);

View File

@@ -89,7 +89,7 @@ int vma_drop_link(struct vm_obj_link *shadower_link,
* We know shadower is deleted from original's list
* because each shadow can shadow a single object.
*/
list_del(&shadower_link->shref);
list_del(&shadower_link->obj->shref);
/* Delete the original link */
kfree(orig_link);
@@ -125,77 +125,63 @@ int vm_object_is_subset(struct vm_object *copier,
return 1;
}
/* Merges link 1 to link 2 */
int vma_do_merge_link(struct vm_obj_link *link1, struct vm_obj_link *link2)
/*
* When one shadow object is redundant, merges it into the shadow in front of it.
* Note it must be determined that it is redundant before calling this function.
*
* vma --> link1 --> link2 --> link3
* | | |
* v v v
* Front Redundant Next
* Shadow Shadow Object (E.g. shadow or file)
*/
int vma_merge_link(struct vm_obj_link *redundant_shlink)
{
struct vm_object *obj1 = link1->obj;
struct vm_object *obj2 = link2->obj;
/* The redundant shadow object */
struct vm_object *redundant = redundant_shlink->obj;
struct vm_object *front; /* Shadow in front of redundant */
struct page *p1, *p2;
/* Move all non-intersecting pages to link2. */
list_for_each_entry(p1, &obj1->page_cache, list) {
/* Page doesn't exist, move it to shadower */
if (!(p2 = find_page(obj2, p1->offset))) {
/* Check refcount is really 1 */
BUG_ON(redundant->refcnt != 1);
/* Get the last shadower object in front */
front = list_entry(redundant->shadowers.next,
struct vm_object, shref);
/* Move all non-intersecting pages to front shadow. */
list_for_each_entry(p1, &redundant->page_cache, list) {
/* Page doesn't exist in front, move it there */
if (!(p2 = find_page(front, p1->offset))) {
list_del(&p1->list);
spin_lock(&p1->lock);
p1->owner = obj2;
p1->owner = front;
spin_unlock(&p1->lock);
insert_page_olist(p1, obj2);
obj2->npages++;
insert_page_olist(p1, front);
front->npages++;
}
}
/* Delete the object along with all its pages. */
vm_object_delete(obj1);
/* Sort out shadow relationships after the merge: */
/* Front won't be a shadow of the redundant shadow anymore */
list_del_init(&front->shref);
/* Check that there really was one shadower of redundant left */
BUG_ON(!list_empty(&redundant->shadowers));
/* Redundant won't be a shadow of its next object */
list_del(&redundant->shref);
/* Front is now a shadow of redundant's next object */
list_add(&front->shref, &redundant->orig_obj->shadowers);
front->orig_obj = redundant->orig_obj;
/* Delete the redundant shadow along with all its pages. */
vm_object_delete(redundant);
/* Delete the last link for the object */
kfree(link1);
return 0;
}
/*
* Finds the only shadower of a vm object, finds the link to
* the object that is in the same vma as the shadower, and
* merges the two in the shadower object, frees the link and
* the original object. Note this must be called when merge
* is decided.
*/
int vma_merge_link(struct vm_object *vmo)
{
struct vm_obj_link *sh_link, *vmo_link;
/* Check refcount */
BUG_ON(vmo->refcnt != 1);
/* Get the last shadower entry */
sh_link = list_entry(vmo->shadowers.next,
struct vm_obj_link, shref);
/* Remove it from original's shadow list */
list_del(&sh_link->shref);
/* Check that there really was one shadower left */
BUG_ON(!list_empty(&vmo_link->obj->shadowers));
/*
* Get the link to vmo that is in the same list as
* the only shadower
*/
vmo_link = list_entry(sh_link->list.next,
struct vm_obj_link, list);
/*
* Check that we got the right link. Since it is
* an ordered list, the link must be the following
* entry after its shadower.
*/
BUG_ON(vmo_link->obj != vmo);
/*
* Now that we got the consecutive links in the
* same vma, do the actual merge.
*/
vma_do_merge_link(vmo_link, sh_link);
kfree(redundant_shlink);
return 0;
}
@@ -207,7 +193,6 @@ struct vm_obj_link *vm_objlink_create(void)
if (!(vmo_link = kzalloc(sizeof(*vmo_link))))
return PTR_ERR(-ENOMEM);
INIT_LIST_HEAD(&vmo_link->list);
INIT_LIST_HEAD(&vmo_link->shref);
return vmo_link;
}
@@ -221,16 +206,13 @@ struct vm_obj_link *vma_create_shadow(void)
struct vm_object *vmo;
struct vm_obj_link *vmo_link;
/* FIXME: Why not use vm_objlink_create() ??? */
if (!(vmo_link = kzalloc(sizeof(*vmo_link))))
if (IS_ERR(vmo_link = vm_objlink_create()))
return 0;
if (!(vmo = vm_object_create())) {
kfree(vmo_link);
return 0;
}
INIT_LIST_HEAD(&vmo_link->list);
INIT_LIST_HEAD(&vmo_link->shref);
vmo->flags = VM_OBJ_SHADOW;
vmo_link->obj = vmo;
@@ -302,6 +284,7 @@ struct page *copy_on_write(struct fault_data *fault)
if (!(shadow_link = vma_create_shadow()))
return PTR_ERR(-ENOMEM);
dprintf("%s: Created a shadow.\n", __TASKNAME__);
/* Initialise the shadow */
shadow = shadow_link->obj;
shadow->refcnt = 1;
@@ -319,6 +302,9 @@ struct page *copy_on_write(struct fault_data *fault)
*/
list_add(&shadow_link->list, &vma->vm_obj_list);
/* Add object to original's shadower list */
list_add(&shadow->shref, &shadow->orig_obj->shadowers);
/* Add to global object list */
list_add(&shadow->list, &vm_object_list);
@@ -402,7 +388,7 @@ struct page *copy_on_write(struct fault_data *fault)
*/
if ((vmo->refcnt == 1) &&
(vmo->flags != VM_OBJ_FILE))
vma_merge_link(vmo);
vma_merge_link(vmo_link);
}
}
@@ -568,328 +554,6 @@ int vm_freeze_shadows(struct tcb *task)
return 0;
}
#if 0
/*
* Old function, likely to be ditched.
*
* For copy-on-write vmas, grows an existing shadow vma, or creates a new one
* for the copy-on-write'ed page. Then adds this shadow vma to the actual vma's
* shadow list. Shadow vmas never overlap with each other, and always overlap
* with part of their original vma.
*/
struct vm_area *copy_on_write_vma(struct fault_data *fault)
{
struct vm_area *shadow;
unsigned long faulty_pfn = __pfn(fault->address);
BUG_ON(faulty_pfn < fault->vma->pfn_start ||
faulty_pfn >= fault->vma->pfn_end);
list_for_each_entry(shadow, &fault->vma->shadow_list, shadow_list) {
if (faulty_pfn == (shadow->pfn_start - 1)) {
/* Growing start of existing shadow vma */
shadow->pfn_start = faulty_pfn;
shadow->f_offset -= 1;
return shadow;
} else if (faulty_pfn == (shadow->pfn_end + 1)) {
/* Growing end of existing shadow vma */
shadow->pfn_end = faulty_pfn;
return shadow;
}
}
/* Otherwise this is a new shadow vma that must be initialised */
shadow = kzalloc(sizeof(struct vm_area));
BUG(); /* This f_offset is wrong. Using uninitialised fields, besides
swap offsets calculate differently */
shadow->f_offset = faulty_pfn - shadow->pfn_start
+ shadow->f_offset;
shadow->pfn_start = faulty_pfn;
shadow->pfn_end = faulty_pfn + 1; /* End pfn is exclusive */
shadow->flags = fault->vma->flags;
/* The vma is owned by the swap file, since it's a private vma */
shadow->owner = fault->task->swap_file;
INIT_LIST_HEAD(&shadow->list);
INIT_LIST_HEAD(&shadow->shadow_list);
/*
* The actual vma uses its shadow_list as the list head for shadows.
* The shadows use their list member, and shadow_list is unused.
*/
list_add(&shadow->list, &fault->vma->shadow_list);
return shadow;
}
/*
* Handles any page ownership change or allocation for file-backed pages.
*/
int do_file_page(struct fault_data *fault)
{
unsigned int reason = fault->reason;
unsigned int vma_flags = fault->vma->flags;
unsigned int pte_flags = vm_prot_flags(fault->kdata->pte);
/* For RO or non-cow WR pages just read in the page */
if (((reason & VM_READ) || ((reason & VM_WRITE) && !(vma_flags & VMA_COW)))
&& (pte_flags & VM_NONE)) {
/* Allocate a new page */
void *paddr = alloc_page(1);
void *vaddr = phys_to_virt(paddr);
struct page *page = phys_to_page(paddr);
unsigned long f_offset = fault_to_file_offset(fault);
/* Map new page at a self virtual address temporarily */
l4_map(paddr, vaddr, 1, MAP_USR_RW_FLAGS, self_tid());
/*
* Read the page. (Simply read into the faulty area that's
* now mapped using a newly allocated page.)
*/
if (fault->vma->owner->pager->ops.read_page(fault->vma->owner,
f_offset,
vaddr) < 0)
BUG();
/* Remove temporary mapping */
l4_unmap(vaddr, 1, self_tid());
/* Map it to task. */
l4_map(paddr, (void *)page_align(fault->address), 1,
(reason & VM_READ) ? MAP_USR_RO_FLAGS : MAP_USR_RW_FLAGS,
fault->task->tid);
spin_lock(&page->lock);
/* Update its page descriptor */
page->count++;
page->owner = fault->vma->owner;
page->f_offset = __pfn(fault->address)
- fault->vma->pfn_start + fault->vma->f_offset;
page->virtual = page_align(fault->address);
/* Add the page to it's owner's list of in-memory pages */
BUG_ON(!list_empty(&page->list));
insert_page_olist(page, page->owner);
spin_unlock(&page->lock);
//printf("%s: Mapped new page @ 0x%x to task: %d\n", __TASKNAME__,
// fault->address, fault->task->tid);
/* Upgrade RO page to non-cow write */
} else if ((reason & VM_WRITE) && (pte_flags & VM_READ)
&& !(vma_flags & VMA_COW)) {
/* The page is mapped in, just update its permission */
l4_map((void *)__pte_to_addr(fault->kdata->pte),
(void *)page_align(fault->address), 1,
MAP_USR_RW_FLAGS, fault->task->tid);
/*
* For cow-write, allocate private pages and create shadow vmas.
*/
} else if ((reason & VM_WRITE) && (pte_flags & VM_READ)
&& (vma_flags & VMA_COW)) {
void *pa = (void *)__pte_to_addr(fault->kdata->pte);
void *new_pa = alloc_page(1);
struct page *page = phys_to_page(pa);
struct page *new_page = phys_to_page(new_pa);
void *va, *new_va;
/* Create or obtain existing shadow vma for the page */
struct vm_area *shadow = copy_on_write_vma(fault);
/* Map new page at a local virtual address temporarily */
new_va = l4_map_helper(new_pa, 1);
/* Map the old page (vmapped for process but not us) to self */
va = l4_map_helper(pa, 1);
/* Copy data from old to new page */
memcpy(new_va, va, PAGE_SIZE);
/* Remove temporary mappings */
l4_unmap(va, 1, self_tid());
l4_unmap(new_va, 1, self_tid());
spin_lock(&page->lock);
/* Clear usage details for original page. */
page->count--;
page->virtual = 0; /* FIXME: Maybe mapped for multiple processes ? */
/* New page is owned by shadow's owner (swap) */
new_page->owner = shadow->owner;
new_page->count++;
new_page->f_offset = __pfn(fault->address)
- shadow->pfn_start + shadow->f_offset;
new_page->virtual = page_align(fault->address);
/* Add the page to it's owner's list of in-memory pages */
BUG_ON(!list_empty(&page->list));
insert_page_olist(page, page->owner);
spin_unlock(&page->lock);
/*
* Overwrite the original file-backed page's mapping on this
* task with the writeable private page. The original physical
* page still exists in memory and can be referenced from its
* associated owner file, but it's not mapped into any virtual
* address anymore in this task.
*/
l4_map(new_pa, (void *)page_align(fault->address), 1,
MAP_USR_RW_FLAGS, fault->task->tid);
} else if ((reason & VM_WRITE) && (pte_flags & VM_NONE)
&& (vma_flags & VMA_COW)) {
struct vm_area *shadow;
/* Allocate a new page */
void *paddr = alloc_page(1);
void *vaddr = phys_to_virt(paddr);
struct page *page = phys_to_page(paddr);
unsigned long f_offset = fault_to_file_offset(fault);
/* Map it to self */
l4_map(paddr, vaddr, 1, MAP_USR_RW_FLAGS, self_tid());
/* Update its page descriptor */
page->count++;
page->owner = fault->vma->owner;
page->f_offset = __pfn(fault->address)
- fault->vma->pfn_start + fault->vma->f_offset;
page->virtual = page_align(fault->address);
/*
* Read the page. (Simply read into the faulty area that's
* now mapped using a newly allocated page.)
*/
if (fault->vma->owner->pager->ops.read_page(fault->vma->owner,
f_offset,
vaddr) < 0)
BUG();
/* Unmap from self */
l4_unmap(vaddr, 1, self_tid());
/* Map to task. */
l4_map(paddr, (void *)page_align(fault->address), 1,
MAP_USR_RW_FLAGS, fault->task->tid);
/* Obtain a shadow vma for the page */
shadow = copy_on_write_vma(fault);
spin_lock(&page->lock);
/* Now anonymise the page by changing its owner file to swap */
page->owner = shadow->owner;
/* Page's offset is different in its new owner. */
page->f_offset = __pfn(fault->address)
- fault->vma->pfn_start + fault->vma->f_offset;
/* Add the page to it's owner's list of in-memory pages */
BUG_ON(!list_empty(&page->list));
insert_page_olist(page, page->owner);
spin_unlock(&page->lock);
} else
BUG();
return 0;
}
/*
* Handles any page allocation or file ownership change for anonymous pages.
* For read accesses initialises a wired-in zero page and for write accesses
* initialises a private ZI page giving its ownership to the swap file.
*/
int do_anon_page(struct fault_data *fault)
{
unsigned int pte_flags = vm_prot_flags(fault->kdata->pte);
void *paddr, *vaddr;
struct page *page;
/* If swapped, read in with vma's pager (swap in anon case) */
if (pte_flags & VM_SWAPPED) {
BUG();
// Properly implement:
// fault->vma->owner->pager->ops.read_page(fault);
/* Map the page with right permission */
if (fault->reason & VM_READ)
l4_map(paddr, (void *)page_align(fault->address), 1,
MAP_USR_RO_FLAGS, fault->task->tid);
else if (fault->reason & VM_WRITE)
l4_map(paddr, (void *)page_align(fault->address), 1,
MAP_USR_RW_FLAGS, fault->task->tid);
else
BUG();
return 0;
}
/* For non-existant pages just map the zero page, unless it is the
* beginning of stack which requires environment and argument data. */
if (fault->reason & VM_READ) {
/*
* Zero page is a special wired-in page that is mapped
* many times in many tasks. Just update its count field.
*/
paddr = get_zero_page();
l4_map(paddr, (void *)page_align(fault->address), 1,
MAP_USR_RO_FLAGS, fault->task->tid);
}
/* Write faults require a real zero initialised page */
if (fault->reason & VM_WRITE) {
paddr = alloc_page(1);
vaddr = phys_to_virt(paddr);
page = phys_to_page(paddr);
/* NOTE:
* This mapping overwrites the original RO mapping which
* is anticipated to be the zero page.
*/
BUG_ON(__pte_to_addr(fault->kdata->pte) !=
(unsigned long)get_zero_page());
/* Map new page at a self virtual address temporarily */
l4_map(paddr, vaddr, 1, MAP_USR_RW_FLAGS, self_tid());
/* Clear the page */
memset((void *)vaddr, 0, PAGE_SIZE);
/* Remove temporary mapping */
l4_unmap((void *)vaddr, 1, self_tid());
/* Map the page to task */
l4_map(paddr, (void *)page_align(fault->address), 1,
MAP_USR_RW_FLAGS, fault->task->tid);
/*** DEBUG CODE FOR FS0 UTCB ***/
if(page_align(fault->address) == 0xf8001000) {
printf("For FS0 utcb @ 0xf8001000, mapping page @ 0x%x, foffset: 0x%x, owned by vma @ 0x%x, vmfile @ 0x%x\n",
(unsigned long)page, page->f_offset, fault->vma, fault->vma->owner);
}
if(page_align(fault->address) == 0xf8002000) {
printf("For FS0 utcb @ 0xf8002000, mapping page @ 0x%x, foffset: 0x%x, owned by vma @ 0x%x, vmfile @ 0x%x\n",
(unsigned long)page, page->f_offset, fault->vma, fault->vma->owner);
}
/*** DEBUG CODE FOR FS0 UTCB ***/
spin_lock(&page->lock);
/* vma's swap file owns this page */
page->owner = fault->vma->owner;
/* Add the page to it's owner's list of in-memory pages */
BUG_ON(!list_empty(&page->list));
insert_page_olist(page, page->owner);
/* The offset of this page in its owner file */
page->f_offset = __pfn(fault->address)
- fault->vma->pfn_start + fault->vma->f_offset;
page->count++;
page->virtual = page_align(fault->address);
spin_unlock(&page->lock);
}
return 0;
}
#endif
/*
* Page fault model:
*

View File

@@ -62,6 +62,8 @@ LIST_HEAD(vm_object_list);
struct vm_object *vm_object_init(struct vm_object *obj)
{
INIT_LIST_HEAD(&obj->list);
INIT_LIST_HEAD(&obj->shref);
INIT_LIST_HEAD(&obj->shadowers);
INIT_LIST_HEAD(&obj->page_cache);
return obj;