mirror of
https://github.com/drasko/codezero.git
synced 2026-01-13 19:33:15 +01:00
Tasks boot fine up to doing ipc using their utcbs. UTCB PLAN: - Push ipc registers into private environment instead of a shared utcb, but map-in a shared utcb to pass on long data to server tasks. - Shared utcb has unique virtual address for every thread. - Forked child does inherit parent's utcb, but cannot use it to communicate to any server. It must explicitly obtain its own utcb for that. - Clone could have a flag to explicitly not inherit parent utcb, which is the right thing to do. - MM0 serves a syscall to obtain self utcb. - By this method, upon forks tasks don't need to map-in a utcb unless they want to pass long data.
912 lines
27 KiB
C
912 lines
27 KiB
C
/*
|
|
* Page fault handling.
|
|
*
|
|
* Copyright (C) 2007, 2008 Bahadir Balban
|
|
*/
|
|
#include <vm_area.h>
|
|
#include <task.h>
|
|
#include <mm/alloc_page.h>
|
|
#include <kmalloc/kmalloc.h>
|
|
#include <l4lib/arch/syscalls.h>
|
|
#include <l4lib/arch/syslib.h>
|
|
#include INC_GLUE(memory.h)
|
|
#include INC_SUBARCH(mm.h)
|
|
#include <arch/mm.h>
|
|
#include <l4/generic/space.h>
|
|
#include <l4/api/errno.h>
|
|
#include <string.h>
|
|
#include <memory.h>
|
|
#include <shm.h>
|
|
#include <file.h>
|
|
|
|
unsigned long fault_to_file_offset(struct fault_data *fault)
|
|
{
|
|
/* Fault's offset in its vma */
|
|
unsigned long vma_off_pfn = __pfn(fault->address) - fault->vma->pfn_start;
|
|
|
|
/* Fault's offset in the file */
|
|
unsigned long f_off_pfn = fault->vma->file_offset + vma_off_pfn;
|
|
|
|
return f_off_pfn;
|
|
}
|
|
|
|
/*
|
|
* Given a reference to a vm_object link, returns the next link but
|
|
* avoids wrapping around back to head. If next is head, returns 0.
|
|
*
|
|
* vma->link1->link2->link3
|
|
* | | |
|
|
* V V V
|
|
* vmo1 vmo2 vmo3|vm_file
|
|
*
|
|
* Example:
|
|
* Given a reference to link = vma, head = vma, returns link1.
|
|
* Given a reference to link = link3, head = vma, returns 0.
|
|
*/
|
|
struct vm_obj_link *vma_next_link(struct list_head *link,
|
|
struct list_head *head)
|
|
{
|
|
BUG_ON(list_empty(link));
|
|
if (link->next == head)
|
|
return 0;
|
|
else
|
|
return list_entry(link->next, struct vm_obj_link, list);
|
|
}
|
|
|
|
/* Unlinks obj_link from its vma and deletes it but keeps the object. */
|
|
int vma_drop_link(struct vm_obj_link *shadower_link,
|
|
struct vm_obj_link *orig_link)
|
|
{
|
|
/* Remove object link from vma's list */
|
|
list_del(&orig_link->list);
|
|
|
|
/* Reduce object's ref count */
|
|
orig_link->obj->refcnt--;
|
|
|
|
/*
|
|
* Refcount could go as low as 1 but not zero because shortly
|
|
* after it goes down to one, it is removed from the link
|
|
* chain so it can never exist with a refcount less than 1
|
|
* in the chain.
|
|
*/
|
|
if (orig_link->obj->refcnt < 1) {
|
|
printf("%s: Shadower:\n", __FUNCTION__);
|
|
vm_object_print(shadower_link->obj);
|
|
|
|
printf("%s: Original:\n", __FUNCTION__);
|
|
vm_object_print(orig_link->obj);
|
|
BUG();
|
|
}
|
|
|
|
/*
|
|
* Remove the shadower from original's shadower list.
|
|
* We know shadower is deleted from original's list
|
|
* because each shadow can shadow a single object.
|
|
*/
|
|
list_del(&shadower_link->shref);
|
|
|
|
/* Delete the original link */
|
|
kfree(orig_link);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Checks if page cache pages of lesser is a subset of those of copier.
|
|
* Note this just checks the page cache, so if any objects have pages
|
|
* swapped to disk, this function does not rule.
|
|
*/
|
|
int vm_object_is_subset(struct vm_object *copier,
|
|
struct vm_object *lesser)
|
|
{
|
|
struct page *pc, *pl;
|
|
|
|
/* Copier must have equal or more pages to overlap lesser */
|
|
if (copier->npages < lesser->npages)
|
|
return 0;
|
|
|
|
/*
|
|
* Do a page by page comparison. Every lesser page
|
|
* must be in copier for overlap.
|
|
*/
|
|
list_for_each_entry(pl, &lesser->page_cache, list)
|
|
if (!(pc = find_page(copier, pl->offset)))
|
|
return 0;
|
|
/*
|
|
* For all pages of lesser vmo, there seems to be a page
|
|
* in the copier vmo. So lesser is a subset of copier
|
|
*/
|
|
return 1;
|
|
}
|
|
|
|
/* Merges link 1 to link 2 */
|
|
int vma_do_merge_link(struct vm_obj_link *link1, struct vm_obj_link *link2)
|
|
{
|
|
struct vm_object *obj1 = link1->obj;
|
|
struct vm_object *obj2 = link2->obj;
|
|
struct page *p1, *p2;
|
|
|
|
/* Move all non-intersecting pages to link2. */
|
|
list_for_each_entry(p1, &obj1->page_cache, list) {
|
|
/* Page doesn't exist, move it to shadower */
|
|
if (!(p2 = find_page(obj2, p1->offset))) {
|
|
list_del(&p1->list);
|
|
spin_lock(&p1->lock);
|
|
p1->owner = obj2;
|
|
spin_unlock(&p1->lock);
|
|
insert_page_olist(p1, obj2);
|
|
obj2->npages++;
|
|
}
|
|
}
|
|
/* Delete the object along with all its pages. */
|
|
vm_object_delete(obj1);
|
|
|
|
/* Delete the last link for the object */
|
|
kfree(link1);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Finds the only shadower of a vm object, finds the link to
|
|
* the object that is in the same vma as the shadower, and
|
|
* merges the two in the shadower object, frees the link and
|
|
* the original object. Note this must be called when merge
|
|
* is decided.
|
|
*/
|
|
int vma_merge_link(struct vm_object *vmo)
|
|
{
|
|
struct vm_obj_link *sh_link, *vmo_link;
|
|
|
|
/* Check refcount */
|
|
BUG_ON(vmo->refcnt != 1);
|
|
|
|
/* Get the last shadower entry */
|
|
sh_link = list_entry(vmo->shadowers.next,
|
|
struct vm_obj_link, shref);
|
|
|
|
/* Remove it from original's shadow list */
|
|
list_del(&sh_link->shref);
|
|
|
|
/* Check that there really was one shadower left */
|
|
BUG_ON(!list_empty(&vmo_link->obj->shadowers));
|
|
|
|
/*
|
|
* Get the link to vmo that is in the same list as
|
|
* the only shadower
|
|
*/
|
|
vmo_link = list_entry(sh_link->list.next,
|
|
struct vm_obj_link, list);
|
|
|
|
/*
|
|
* Check that we got the right link. Since it is
|
|
* an ordered list, the link must be the following
|
|
* entry after its shadower.
|
|
*/
|
|
BUG_ON(vmo_link->obj != vmo);
|
|
|
|
/*
|
|
* Now that we got the consecutive links in the
|
|
* same vma, do the actual merge.
|
|
*/
|
|
vma_do_merge_link(vmo_link, sh_link);
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct vm_obj_link *vm_objlink_create(void)
|
|
{
|
|
struct vm_obj_link *vmo_link;
|
|
|
|
if (!(vmo_link = kzalloc(sizeof(*vmo_link))))
|
|
return PTR_ERR(-ENOMEM);
|
|
INIT_LIST_HEAD(&vmo_link->list);
|
|
INIT_LIST_HEAD(&vmo_link->shref);
|
|
|
|
return vmo_link;
|
|
}
|
|
|
|
/*
|
|
* Creates a bare vm_object along with its vma link, since
|
|
* the shadow will be immediately used in a vma object list.
|
|
*/
|
|
struct vm_obj_link *vma_create_shadow(void)
|
|
{
|
|
struct vm_object *vmo;
|
|
struct vm_obj_link *vmo_link;
|
|
|
|
if (!(vmo_link = kzalloc(sizeof(*vmo_link))))
|
|
return 0;
|
|
|
|
if (!(vmo = vm_object_create())) {
|
|
kfree(vmo_link);
|
|
return 0;
|
|
}
|
|
INIT_LIST_HEAD(&vmo_link->list);
|
|
INIT_LIST_HEAD(&vmo_link->shref);
|
|
vmo->flags = VM_OBJ_SHADOW;
|
|
vmo_link->obj = vmo;
|
|
|
|
return vmo_link;
|
|
}
|
|
|
|
/* Allocates a new page, copies the original onto it and returns. */
|
|
struct page *copy_page(struct page *orig)
|
|
{
|
|
void *new_vaddr, *vaddr, *paddr;
|
|
struct page *new;
|
|
|
|
if (!(paddr = alloc_page(1)))
|
|
return 0;
|
|
|
|
new = phys_to_page(paddr);
|
|
|
|
/* Map the new and orig page to self */
|
|
new_vaddr = l4_map_helper(paddr, 1);
|
|
vaddr = l4_map_helper((void *)page_to_phys(orig), 1);
|
|
|
|
/* Copy the page into new page */
|
|
memcpy(new_vaddr, vaddr, PAGE_SIZE);
|
|
|
|
/* Unmap both pages from current task. */
|
|
l4_unmap_helper(vaddr, 1);
|
|
l4_unmap_helper(new_vaddr, 1);
|
|
|
|
return new;
|
|
}
|
|
|
|
/* TODO:
|
|
* - Why not allocate a swap descriptor in vma_create_shadow() rather than
|
|
* a bare vm_object? It will be needed.
|
|
* - Does vm_write clash with any other object flags???
|
|
* - Check refcounting of shadows, their references, page refs,
|
|
* reduces increases etc.
|
|
*/
|
|
int copy_on_write(struct fault_data *fault)
|
|
{
|
|
struct vm_obj_link *vmo_link, *shadow_link, *copier_link;
|
|
struct vm_object *vmo, *shadow;
|
|
struct page *page, *new_page;
|
|
struct vm_area *vma = fault->vma;
|
|
unsigned int reason = fault->reason;
|
|
unsigned long file_offset = fault_to_file_offset(fault);
|
|
|
|
/* Get the first object, either original file or a shadow */
|
|
if (!(vmo_link = vma_next_link(&vma->vm_obj_list, &vma->vm_obj_list))) {
|
|
printf("%s:%s: No vm object in vma!\n",
|
|
__TASKNAME__, __FUNCTION__);
|
|
BUG();
|
|
}
|
|
|
|
/* Is the object read-only? Create a shadow object if so.
|
|
*
|
|
* NOTE: Whenever the topmost object is read-only, a new shadow
|
|
* object must be created. When there are no shadows one is created
|
|
* because, its the original vm_object that is not writeable, and
|
|
* when there are shadows one is created because a fork had just
|
|
* happened, in which case all shadows are rendered read-only.
|
|
*/
|
|
if (!(vmo_link->obj->flags & VM_WRITE)) {
|
|
if (!(shadow_link = vma_create_shadow()))
|
|
return -ENOMEM;
|
|
dprintf("%s: Created a shadow.\n", __TASKNAME__);
|
|
/* Initialise the shadow */
|
|
shadow = shadow_link->obj;
|
|
shadow->refcnt = 1;
|
|
shadow->orig_obj = vmo_link->obj;
|
|
shadow->flags = VM_OBJ_SHADOW | VM_WRITE;
|
|
shadow->pager = &swap_pager;
|
|
|
|
/*
|
|
* Add the shadow in front of the original:
|
|
*
|
|
* vma->link0->link1
|
|
* | |
|
|
* V V
|
|
* shadow original
|
|
*/
|
|
list_add(&shadow_link->list, &vma->vm_obj_list);
|
|
|
|
/* Add to global object list */
|
|
list_add(&shadow->list, &vm_object_list);
|
|
|
|
/* Shadow is the copier object */
|
|
copier_link = shadow_link;
|
|
} else {
|
|
dprintf("No shadows. Going to add to topmost r/w shadow object\n");
|
|
/* No new shadows, the topmost r/w vmo is the copier object */
|
|
copier_link = vmo_link;
|
|
|
|
/*
|
|
* We start page search on read-only objects. If the first
|
|
* one was writable, go to next which must be read-only.
|
|
*/
|
|
BUG_ON(!(vmo_link = vma_next_link(&vmo_link->list,
|
|
&vma->vm_obj_list)));
|
|
BUG_ON(vmo_link->obj->flags & VM_WRITE);
|
|
}
|
|
|
|
/* Traverse the list of read-only vm objects and search for the page */
|
|
while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
|
|
file_offset))) {
|
|
if (!(vmo_link = vma_next_link(&vmo_link->list,
|
|
&vma->vm_obj_list))) {
|
|
printf("%s:%s: Traversed all shadows and the original "
|
|
"file's vm_object, but could not find the "
|
|
"faulty page in this vma.\n",__TASKNAME__,
|
|
__FUNCTION__);
|
|
BUG();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Copy the page. This traverse and copy is like a page-in operation
|
|
* of a pager, except that the page is moving along vm_objects.
|
|
*/
|
|
new_page = copy_page(page);
|
|
|
|
/* Update page details */
|
|
spin_lock(&new_page->lock);
|
|
new_page->refcnt = 0;
|
|
new_page->owner = copier_link->obj;
|
|
new_page->offset = file_offset;
|
|
new_page->virtual = 0;
|
|
BUG_ON(!list_empty(&new_page->list));
|
|
spin_unlock(&page->lock);
|
|
|
|
/* Add the page to owner's list of in-memory pages */
|
|
insert_page_olist(new_page, new_page->owner);
|
|
new_page->owner->npages++;
|
|
|
|
/* Map the new page to faulty task */
|
|
l4_map((void *)page_to_phys(new_page),
|
|
(void *)page_align(fault->address), 1,
|
|
(reason & VM_READ) ? MAP_USR_RO_FLAGS : MAP_USR_RW_FLAGS,
|
|
fault->task->tid);
|
|
dprintf("%s: Mapped 0x%x as writable to tid %d.\n", __TASKNAME__,
|
|
page_align(fault->address), fault->task->tid);
|
|
vm_object_print(new_page->owner);
|
|
|
|
/*
|
|
* Finished handling the actual fault, now check for possible
|
|
* shadow collapses. Does the copier completely shadow the one
|
|
* underlying it?
|
|
*/
|
|
if (!(vmo_link = vma_next_link(&copier_link->list, &vma->vm_obj_list))) {
|
|
/* Copier must have an object under it */
|
|
printf("Copier must have had an object under it!\n");
|
|
BUG();
|
|
}
|
|
|
|
/* Compare whether page caches overlap */
|
|
if (vm_object_is_subset(copier_link->obj, vmo_link->obj)) {
|
|
/*
|
|
* They do overlap, so keep reference to object but
|
|
* drop and delete the vma link.
|
|
*/
|
|
vmo = vmo_link->obj;
|
|
vma_drop_link(copier_link, vmo_link);
|
|
vmo_link = 0;
|
|
|
|
/* vm object reference down to one and object is mergeable? */
|
|
if ((vmo->refcnt == 1) &&
|
|
(vmo->flags != VM_OBJ_FILE))
|
|
vma_merge_link(vmo);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Handles the page fault, all entries here are assumed *legal* faults,
|
|
* i.e. do_page_fault() should have already checked for illegal accesses.
|
|
*/
|
|
int __do_page_fault(struct fault_data *fault)
|
|
{
|
|
unsigned int reason = fault->reason;
|
|
unsigned int vma_flags = fault->vma->flags;
|
|
unsigned int pte_flags = vm_prot_flags(fault->kdata->pte);
|
|
struct vm_area *vma = fault->vma;
|
|
unsigned long file_offset;
|
|
struct vm_obj_link *vmo_link;
|
|
struct page *page;
|
|
|
|
/* Handle read */
|
|
if ((reason & VM_READ) && (pte_flags & VM_NONE)) {
|
|
file_offset = fault_to_file_offset(fault);
|
|
|
|
/* Get the first object, either original file or a shadow */
|
|
if (!(vmo_link = vma_next_link(&vma->vm_obj_list, &vma->vm_obj_list))) {
|
|
printf("%s:%s: No vm object in vma!\n",
|
|
__TASKNAME__, __FUNCTION__);
|
|
BUG();
|
|
}
|
|
|
|
/* Traverse the list of read-only vm objects and search for the page */
|
|
while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
|
|
file_offset))) {
|
|
if (!(vmo_link = vma_next_link(&vmo_link->list,
|
|
&vma->vm_obj_list))) {
|
|
printf("%s:%s: Traversed all shadows and the original "
|
|
"file's vm_object, but could not find the "
|
|
"faulty page in this vma.\n",__TASKNAME__,
|
|
__FUNCTION__);
|
|
BUG();
|
|
}
|
|
}
|
|
BUG_ON(!page);
|
|
|
|
/* Map it to faulty task */
|
|
l4_map((void *)page_to_phys(page),
|
|
(void *)page_align(fault->address), 1,
|
|
(reason & VM_READ) ? MAP_USR_RO_FLAGS : MAP_USR_RW_FLAGS,
|
|
fault->task->tid);
|
|
dprintf("%s: Mapped 0x%x as readable to tid %d.\n", __TASKNAME__,
|
|
page_align(fault->address), fault->task->tid);
|
|
vm_object_print(vmo_link->obj);
|
|
}
|
|
|
|
/* Handle write */
|
|
if ((reason & VM_WRITE) && (pte_flags & VM_READ)) {
|
|
/* Copy-on-write */
|
|
if (vma_flags & VMA_PRIVATE)
|
|
copy_on_write(fault);
|
|
|
|
/* Regular files */
|
|
if ((vma_flags & VMA_SHARED) && !(vma_flags & VMA_ANONYMOUS)) {
|
|
/* No regular files are mapped yet */
|
|
BUG();
|
|
file_offset = fault_to_file_offset(fault);
|
|
BUG_ON(!(vmo_link = vma_next_link(&vma->vm_obj_list,
|
|
&vma->vm_obj_list)));
|
|
|
|
/* Get the page from its pager */
|
|
if (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
|
|
file_offset))) {
|
|
printf("%s: Could not obtain faulty page.\n",
|
|
__TASKNAME__);
|
|
BUG();
|
|
}
|
|
BUG_ON(!page);
|
|
|
|
/* Map it to faulty task */
|
|
l4_map((void *)page_to_phys(page),
|
|
(void *)page_align(fault->address), 1,
|
|
(reason & VM_READ) ? MAP_USR_RO_FLAGS : MAP_USR_RW_FLAGS,
|
|
fault->task->tid);
|
|
dprintf("%s: Mapped 0x%x as writable to tid %d.\n", __TASKNAME__,
|
|
page_align(fault->address), fault->task->tid);
|
|
vm_object_print(vmo_link->obj);
|
|
}
|
|
/* FIXME: Just do fs files for now, anon shm objects later. */
|
|
/* Things to think about:
|
|
* - Is utcb a shm memory really? Then each task must map it in via
|
|
* shmget(). FS0 must map all user tasks' utcb via shmget() as well.
|
|
* For example to pass on pathnames etc.
|
|
*/
|
|
BUG_ON((vma_flags & VMA_SHARED) && (vma_flags & VMA_ANONYMOUS));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#if 0
|
|
/*
|
|
* Old function, likely to be ditched.
|
|
*
|
|
* For copy-on-write vmas, grows an existing shadow vma, or creates a new one
|
|
* for the copy-on-write'ed page. Then adds this shadow vma to the actual vma's
|
|
* shadow list. Shadow vmas never overlap with each other, and always overlap
|
|
* with part of their original vma.
|
|
*/
|
|
struct vm_area *copy_on_write_vma(struct fault_data *fault)
|
|
{
|
|
struct vm_area *shadow;
|
|
unsigned long faulty_pfn = __pfn(fault->address);
|
|
|
|
BUG_ON(faulty_pfn < fault->vma->pfn_start ||
|
|
faulty_pfn >= fault->vma->pfn_end);
|
|
list_for_each_entry(shadow, &fault->vma->shadow_list, shadow_list) {
|
|
if (faulty_pfn == (shadow->pfn_start - 1)) {
|
|
/* Growing start of existing shadow vma */
|
|
shadow->pfn_start = faulty_pfn;
|
|
shadow->f_offset -= 1;
|
|
return shadow;
|
|
} else if (faulty_pfn == (shadow->pfn_end + 1)) {
|
|
/* Growing end of existing shadow vma */
|
|
shadow->pfn_end = faulty_pfn;
|
|
return shadow;
|
|
}
|
|
}
|
|
/* Otherwise this is a new shadow vma that must be initialised */
|
|
shadow = kzalloc(sizeof(struct vm_area));
|
|
BUG(); /* This f_offset is wrong. Using uninitialised fields, besides
|
|
swap offsets calculate differently */
|
|
shadow->f_offset = faulty_pfn - shadow->pfn_start
|
|
+ shadow->f_offset;
|
|
shadow->pfn_start = faulty_pfn;
|
|
shadow->pfn_end = faulty_pfn + 1; /* End pfn is exclusive */
|
|
shadow->flags = fault->vma->flags;
|
|
|
|
/* The vma is owned by the swap file, since it's a private vma */
|
|
shadow->owner = fault->task->swap_file;
|
|
INIT_LIST_HEAD(&shadow->list);
|
|
INIT_LIST_HEAD(&shadow->shadow_list);
|
|
|
|
/*
|
|
* The actual vma uses its shadow_list as the list head for shadows.
|
|
* The shadows use their list member, and shadow_list is unused.
|
|
*/
|
|
list_add(&shadow->list, &fault->vma->shadow_list);
|
|
return shadow;
|
|
}
|
|
|
|
/*
|
|
* Handles any page ownership change or allocation for file-backed pages.
|
|
*/
|
|
int do_file_page(struct fault_data *fault)
|
|
{
|
|
unsigned int reason = fault->reason;
|
|
unsigned int vma_flags = fault->vma->flags;
|
|
unsigned int pte_flags = vm_prot_flags(fault->kdata->pte);
|
|
|
|
/* For RO or non-cow WR pages just read in the page */
|
|
if (((reason & VM_READ) || ((reason & VM_WRITE) && !(vma_flags & VMA_COW)))
|
|
&& (pte_flags & VM_NONE)) {
|
|
/* Allocate a new page */
|
|
void *paddr = alloc_page(1);
|
|
void *vaddr = phys_to_virt(paddr);
|
|
struct page *page = phys_to_page(paddr);
|
|
unsigned long f_offset = fault_to_file_offset(fault);
|
|
|
|
/* Map new page at a self virtual address temporarily */
|
|
l4_map(paddr, vaddr, 1, MAP_USR_RW_FLAGS, self_tid());
|
|
|
|
/*
|
|
* Read the page. (Simply read into the faulty area that's
|
|
* now mapped using a newly allocated page.)
|
|
*/
|
|
if (fault->vma->owner->pager->ops.read_page(fault->vma->owner,
|
|
f_offset,
|
|
vaddr) < 0)
|
|
BUG();
|
|
|
|
/* Remove temporary mapping */
|
|
l4_unmap(vaddr, 1, self_tid());
|
|
|
|
/* Map it to task. */
|
|
l4_map(paddr, (void *)page_align(fault->address), 1,
|
|
(reason & VM_READ) ? MAP_USR_RO_FLAGS : MAP_USR_RW_FLAGS,
|
|
fault->task->tid);
|
|
|
|
spin_lock(&page->lock);
|
|
|
|
/* Update its page descriptor */
|
|
page->count++;
|
|
page->owner = fault->vma->owner;
|
|
page->f_offset = __pfn(fault->address)
|
|
- fault->vma->pfn_start + fault->vma->f_offset;
|
|
page->virtual = page_align(fault->address);
|
|
|
|
/* Add the page to it's owner's list of in-memory pages */
|
|
BUG_ON(!list_empty(&page->list));
|
|
insert_page_olist(page, page->owner);
|
|
spin_unlock(&page->lock);
|
|
//printf("%s: Mapped new page @ 0x%x to task: %d\n", __TASKNAME__,
|
|
// fault->address, fault->task->tid);
|
|
/* Upgrade RO page to non-cow write */
|
|
} else if ((reason & VM_WRITE) && (pte_flags & VM_READ)
|
|
&& !(vma_flags & VMA_COW)) {
|
|
/* The page is mapped in, just update its permission */
|
|
l4_map((void *)__pte_to_addr(fault->kdata->pte),
|
|
(void *)page_align(fault->address), 1,
|
|
MAP_USR_RW_FLAGS, fault->task->tid);
|
|
|
|
/*
|
|
* For cow-write, allocate private pages and create shadow vmas.
|
|
*/
|
|
} else if ((reason & VM_WRITE) && (pte_flags & VM_READ)
|
|
&& (vma_flags & VMA_COW)) {
|
|
void *pa = (void *)__pte_to_addr(fault->kdata->pte);
|
|
void *new_pa = alloc_page(1);
|
|
struct page *page = phys_to_page(pa);
|
|
struct page *new_page = phys_to_page(new_pa);
|
|
void *va, *new_va;
|
|
|
|
/* Create or obtain existing shadow vma for the page */
|
|
struct vm_area *shadow = copy_on_write_vma(fault);
|
|
|
|
/* Map new page at a local virtual address temporarily */
|
|
new_va = l4_map_helper(new_pa, 1);
|
|
|
|
/* Map the old page (vmapped for process but not us) to self */
|
|
va = l4_map_helper(pa, 1);
|
|
|
|
/* Copy data from old to new page */
|
|
memcpy(new_va, va, PAGE_SIZE);
|
|
|
|
/* Remove temporary mappings */
|
|
l4_unmap(va, 1, self_tid());
|
|
l4_unmap(new_va, 1, self_tid());
|
|
|
|
spin_lock(&page->lock);
|
|
|
|
/* Clear usage details for original page. */
|
|
page->count--;
|
|
page->virtual = 0; /* FIXME: Maybe mapped for multiple processes ? */
|
|
|
|
/* New page is owned by shadow's owner (swap) */
|
|
new_page->owner = shadow->owner;
|
|
new_page->count++;
|
|
new_page->f_offset = __pfn(fault->address)
|
|
- shadow->pfn_start + shadow->f_offset;
|
|
new_page->virtual = page_align(fault->address);
|
|
|
|
/* Add the page to it's owner's list of in-memory pages */
|
|
BUG_ON(!list_empty(&page->list));
|
|
insert_page_olist(page, page->owner);
|
|
spin_unlock(&page->lock);
|
|
|
|
/*
|
|
* Overwrite the original file-backed page's mapping on this
|
|
* task with the writeable private page. The original physical
|
|
* page still exists in memory and can be referenced from its
|
|
* associated owner file, but it's not mapped into any virtual
|
|
* address anymore in this task.
|
|
*/
|
|
l4_map(new_pa, (void *)page_align(fault->address), 1,
|
|
MAP_USR_RW_FLAGS, fault->task->tid);
|
|
|
|
} else if ((reason & VM_WRITE) && (pte_flags & VM_NONE)
|
|
&& (vma_flags & VMA_COW)) {
|
|
struct vm_area *shadow;
|
|
|
|
/* Allocate a new page */
|
|
void *paddr = alloc_page(1);
|
|
void *vaddr = phys_to_virt(paddr);
|
|
struct page *page = phys_to_page(paddr);
|
|
unsigned long f_offset = fault_to_file_offset(fault);
|
|
|
|
/* Map it to self */
|
|
l4_map(paddr, vaddr, 1, MAP_USR_RW_FLAGS, self_tid());
|
|
|
|
/* Update its page descriptor */
|
|
page->count++;
|
|
page->owner = fault->vma->owner;
|
|
page->f_offset = __pfn(fault->address)
|
|
- fault->vma->pfn_start + fault->vma->f_offset;
|
|
page->virtual = page_align(fault->address);
|
|
|
|
/*
|
|
* Read the page. (Simply read into the faulty area that's
|
|
* now mapped using a newly allocated page.)
|
|
*/
|
|
if (fault->vma->owner->pager->ops.read_page(fault->vma->owner,
|
|
f_offset,
|
|
vaddr) < 0)
|
|
BUG();
|
|
|
|
/* Unmap from self */
|
|
l4_unmap(vaddr, 1, self_tid());
|
|
|
|
/* Map to task. */
|
|
l4_map(paddr, (void *)page_align(fault->address), 1,
|
|
MAP_USR_RW_FLAGS, fault->task->tid);
|
|
|
|
/* Obtain a shadow vma for the page */
|
|
shadow = copy_on_write_vma(fault);
|
|
spin_lock(&page->lock);
|
|
|
|
/* Now anonymise the page by changing its owner file to swap */
|
|
page->owner = shadow->owner;
|
|
|
|
/* Page's offset is different in its new owner. */
|
|
page->f_offset = __pfn(fault->address)
|
|
- fault->vma->pfn_start + fault->vma->f_offset;
|
|
|
|
/* Add the page to it's owner's list of in-memory pages */
|
|
BUG_ON(!list_empty(&page->list));
|
|
insert_page_olist(page, page->owner);
|
|
spin_unlock(&page->lock);
|
|
} else
|
|
BUG();
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Handles any page allocation or file ownership change for anonymous pages.
|
|
* For read accesses initialises a wired-in zero page and for write accesses
|
|
* initialises a private ZI page giving its ownership to the swap file.
|
|
*/
|
|
int do_anon_page(struct fault_data *fault)
|
|
{
|
|
unsigned int pte_flags = vm_prot_flags(fault->kdata->pte);
|
|
void *paddr, *vaddr;
|
|
struct page *page;
|
|
|
|
/* If swapped, read in with vma's pager (swap in anon case) */
|
|
if (pte_flags & VM_SWAPPED) {
|
|
BUG();
|
|
// Properly implement:
|
|
// fault->vma->owner->pager->ops.read_page(fault);
|
|
|
|
/* Map the page with right permission */
|
|
if (fault->reason & VM_READ)
|
|
l4_map(paddr, (void *)page_align(fault->address), 1,
|
|
MAP_USR_RO_FLAGS, fault->task->tid);
|
|
else if (fault->reason & VM_WRITE)
|
|
l4_map(paddr, (void *)page_align(fault->address), 1,
|
|
MAP_USR_RW_FLAGS, fault->task->tid);
|
|
else
|
|
BUG();
|
|
return 0;
|
|
}
|
|
|
|
/* For non-existant pages just map the zero page, unless it is the
|
|
* beginning of stack which requires environment and argument data. */
|
|
if (fault->reason & VM_READ) {
|
|
/*
|
|
* Zero page is a special wired-in page that is mapped
|
|
* many times in many tasks. Just update its count field.
|
|
*/
|
|
paddr = get_zero_page();
|
|
|
|
l4_map(paddr, (void *)page_align(fault->address), 1,
|
|
MAP_USR_RO_FLAGS, fault->task->tid);
|
|
}
|
|
|
|
/* Write faults require a real zero initialised page */
|
|
if (fault->reason & VM_WRITE) {
|
|
paddr = alloc_page(1);
|
|
vaddr = phys_to_virt(paddr);
|
|
page = phys_to_page(paddr);
|
|
|
|
/* NOTE:
|
|
* This mapping overwrites the original RO mapping which
|
|
* is anticipated to be the zero page.
|
|
*/
|
|
BUG_ON(__pte_to_addr(fault->kdata->pte) !=
|
|
(unsigned long)get_zero_page());
|
|
|
|
/* Map new page at a self virtual address temporarily */
|
|
l4_map(paddr, vaddr, 1, MAP_USR_RW_FLAGS, self_tid());
|
|
|
|
/* Clear the page */
|
|
memset((void *)vaddr, 0, PAGE_SIZE);
|
|
|
|
/* Remove temporary mapping */
|
|
l4_unmap((void *)vaddr, 1, self_tid());
|
|
|
|
/* Map the page to task */
|
|
l4_map(paddr, (void *)page_align(fault->address), 1,
|
|
MAP_USR_RW_FLAGS, fault->task->tid);
|
|
|
|
/*** DEBUG CODE FOR FS0 UTCB ***/
|
|
if(page_align(fault->address) == 0xf8001000) {
|
|
printf("For FS0 utcb @ 0xf8001000, mapping page @ 0x%x, foffset: 0x%x, owned by vma @ 0x%x, vmfile @ 0x%x\n",
|
|
(unsigned long)page, page->f_offset, fault->vma, fault->vma->owner);
|
|
}
|
|
if(page_align(fault->address) == 0xf8002000) {
|
|
printf("For FS0 utcb @ 0xf8002000, mapping page @ 0x%x, foffset: 0x%x, owned by vma @ 0x%x, vmfile @ 0x%x\n",
|
|
(unsigned long)page, page->f_offset, fault->vma, fault->vma->owner);
|
|
}
|
|
/*** DEBUG CODE FOR FS0 UTCB ***/
|
|
|
|
spin_lock(&page->lock);
|
|
/* vma's swap file owns this page */
|
|
page->owner = fault->vma->owner;
|
|
|
|
/* Add the page to it's owner's list of in-memory pages */
|
|
BUG_ON(!list_empty(&page->list));
|
|
insert_page_olist(page, page->owner);
|
|
|
|
/* The offset of this page in its owner file */
|
|
page->f_offset = __pfn(fault->address)
|
|
- fault->vma->pfn_start + fault->vma->f_offset;
|
|
page->count++;
|
|
page->virtual = page_align(fault->address);
|
|
spin_unlock(&page->lock);
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Page fault model:
|
|
*
|
|
* A page is anonymous (e.g. stack)
|
|
* - page needs read access:
|
|
* action: map the zero page.
|
|
* - page needs write access:
|
|
* action: allocate ZI page and map that. Swap file owns the page.
|
|
* - page is swapped to swap:
|
|
* action: read back from swap file into new page.
|
|
*
|
|
* A page is file-backed but private (e.g. .data section)
|
|
* - page needs read access:
|
|
* action: read the page from its file.
|
|
* - page is swapped out before being private. (i.e. invalidated)
|
|
* action: read the page from its file. (original file)
|
|
* - page is swapped out after being private.
|
|
* action: read the page from its file. (swap file)
|
|
* - page needs write access:
|
|
* action: allocate new page, declare page as private, change its
|
|
* owner to swap file.
|
|
*
|
|
* A page is file backed but not-private, and read-only. (e.g. .text section)
|
|
* - page needs read access:
|
|
* action: read in the page from its file.
|
|
* - page is swapped out. (i.e. invalidated)
|
|
* action: read in the page from its file.
|
|
* - page needs write access:
|
|
* action: forbidden, kill task?
|
|
*
|
|
* A page is file backed but not-private, and read/write. (e.g. any data file.)
|
|
* - page needs read access:
|
|
* action: read in the page from its file.
|
|
* - page is flushed back to its original file. (i.e. instead of swap)
|
|
* action: read in the page from its file.
|
|
* - page needs write access:
|
|
* action: read the page in, give write access.
|
|
*/
|
|
int do_page_fault(struct fault_data *fault)
|
|
{
|
|
unsigned int vma_flags = (fault->vma) ? fault->vma->flags : VM_NONE;
|
|
unsigned int reason = fault->reason;
|
|
int err;
|
|
|
|
/* vma flags show no access */
|
|
if (vma_flags & VM_NONE) {
|
|
printf("Illegal access, tid: %d, address: 0x%x, PC @ 0x%x,\n",
|
|
fault->task->tid, fault->address, fault->kdata->faulty_pc);
|
|
BUG();
|
|
}
|
|
|
|
/* The access reason is not included in the vma's listed flags */
|
|
if (!(reason & vma_flags)) {
|
|
printf("Illegal access, tid: %d, address: 0x%x, PC @ 0x%x\n",
|
|
fault->task->tid, fault->address, fault->kdata->faulty_pc);
|
|
BUG();
|
|
}
|
|
|
|
if ((reason & VM_EXEC) && (vma_flags & VM_EXEC)) {
|
|
printf("Exec faults unsupported yet.\n");
|
|
BUG(); /* Can't handle this yet. */
|
|
}
|
|
|
|
/* Handle legitimate faults */
|
|
__do_page_fault(fault);
|
|
|
|
/* Return the ipc and by doing so restart the faulty thread */
|
|
l4_ipc_return(err);
|
|
return 0;
|
|
}
|
|
|
|
void page_fault_handler(l4id_t sender, fault_kdata_t *fkdata)
|
|
{
|
|
struct fault_data fault = {
|
|
/* Fault data from kernel */
|
|
.kdata = fkdata,
|
|
};
|
|
|
|
BUG_ON(sender == 0);
|
|
|
|
/* Get pager specific task info */
|
|
BUG_ON(!(fault.task = find_task(sender)));
|
|
|
|
/* Extract fault reason, fault address etc. in generic format */
|
|
set_generic_fault_params(&fault);
|
|
|
|
/* Get vma info */
|
|
if (!(fault.vma = find_vma(fault.address,
|
|
&fault.task->vm_area_list)))
|
|
printf("Hmm. No vma for faulty region. "
|
|
"Bad things will happen.\n");
|
|
|
|
/* Handle the actual fault */
|
|
do_page_fault(&fault);
|
|
}
|
|
|