Files
codezero/conts/posix/mm0/mm/fault.c
Bahadir Balban 6fa4884a5a Changes since April
Clean up of build directories.
Simplifications to capability model.
2010-06-01 15:08:13 +03:00

1135 lines
30 KiB
C

/*
* Page fault handling.
*
* Copyright (C) 2007, 2008-2010 Bahadir Bilgehan Balban
*/
#include <vm_area.h>
#include <task.h>
#include <mem/alloc_page.h>
#include <mem/malloc.h>
#include <l4/generic/space.h>
#include <l4/api/errno.h>
#include <string.h>
#include <memory.h>
#include <shm.h>
#include <file.h>
#include <test.h>
#include L4LIB_INC_ARCH(syscalls.h)
#include L4LIB_INC_ARCH(syslib.h)
#include INC_GLUE(memory.h)
#include INC_SUBARCH(mm.h)
#include __INC_ARCH(mm.h)
#include __INC_ARCH(debug.h)
/* Given a page and the vma it is in, returns that page's virtual address */
unsigned long vma_page_to_virtual(struct vm_area *vma, struct page *page)
{
unsigned long virtual_pfn = vma->pfn_start + page->offset - vma->file_offset;
/* Page must be contained in vma's pages */
BUG_ON(vma->file_offset > page->offset);
return __pfn_to_addr(virtual_pfn);
}
unsigned long fault_to_file_offset(struct fault_data *fault)
{
/* Fault's offset in its vma */
unsigned long vma_off_pfn = __pfn(fault->address) - fault->vma->pfn_start;
/* Fault's offset in the file */
unsigned long f_off_pfn = fault->vma->file_offset + vma_off_pfn;
return f_off_pfn;
}
/*
* Given a reference to a vm_object link, returns the next link but
* avoids wrapping around back to head. If next is head, returns 0.
*
* vma->link1->link2->link3
* | | |
* V V V
* vmo1 vmo2 vmo3|vm_file
*
* Example:
* Given a reference to link = vma, head = vma, returns link1.
* Given a reference to link = link3, head = vma, returns 0.
*/
struct vm_obj_link *vma_next_link(struct link *link,
struct link *head)
{
BUG_ON(list_empty(link));
if (link->next == head)
return 0;
else
return link_to_struct(link->next, struct vm_obj_link, list);
}
/* Unlinks orig_link from its vma and deletes it but keeps the object. */
struct vm_object *vma_drop_link(struct vm_obj_link *link)
{
struct vm_object *dropped;
/* Remove object link from vma's list */
list_remove(&link->list);
/* Unlink the link from object */
dropped = vm_unlink_object(link);
/* Delete the original link */
kfree(link);
return dropped;
}
/*
* Checks if page cache pages of lesser is a subset of those of copier.
*
* FIXME:
* Note this just checks the page cache, so if any objects have pages
* swapped to disk, this function won't work, which is a logic error.
* This should really count the swapped ones as well.
*/
int vm_object_is_subset(struct vm_object *shadow,
struct vm_object *original)
{
struct page *pc, *pl;
/* Copier must have equal or more pages to overlap lesser */
if (shadow->npages < original->npages)
return 0;
/*
* Do a page by page comparison. Every lesser page
* must be in copier for overlap.
*/
list_foreach_struct(pl, &original->page_cache, list)
if (!(pc = find_page(shadow, pl->offset)))
return 0;
/*
* For all pages of lesser vmo, there seems to be a page
* in the copier vmo. So lesser is a subset of copier
*/
return 1;
}
static inline int vm_object_is_droppable(struct vm_object *shadow,
struct vm_object *original)
{
if (shadow->npages == original->npages &&
(original->flags & VM_OBJ_SHADOW))
return 1;
else
return 0;
}
/*
* vma_merge_object()
*
* FIXME: Currently this is an optimisation that needs to go
* away when swapping is available. We have this solely because
* currently a shadow needs to identically mirror the whole
* object underneath, in order to drop it. A file that is 1MB
* long would spend 2MB until dropped. When swapping is available,
* we will go back to identical mirroring instead of merging the
* last shadow, since most unused pages would be swapped out.
*/
/*
* When one shadow object is redundant, merges it into the shadow in front of it.
* Note it must be determined that it is redundant before calling this function.
*
* vma --> link1 --> link2 --> link3
* | | |
* v v v
* Front Redundant Next
* Shadow Shadow Object (E.g. shadow or file)
*/
int vma_merge_object(struct vm_object *redundant)
{
/* The redundant shadow object */
struct vm_object *front; /* Shadow in front of redundant */
struct vm_obj_link *last_link;
struct page *p1, *p2, *n;
/* Check link and shadow count is really 1 */
BUG_ON(redundant->nlinks != 1);
BUG_ON(redundant->shadows != 1);
/* Get the last shadower object in front */
front = link_to_struct(redundant->shdw_list.next,
struct vm_object, shref);
/* Move all non-intersecting pages to front shadow. */
list_foreach_removable_struct(p1, n, &redundant->page_cache, list) {
/* Page doesn't exist in front, move it there */
if (!(p2 = find_page(front, p1->offset))) {
list_remove_init(&p1->list);
spin_lock(&p1->lock);
p1->owner = front;
spin_unlock(&p1->lock);
insert_page_olist(p1, front);
front->npages++;
}
}
/* Sort out shadow relationships after the merge: */
/* Front won't be a shadow of the redundant shadow anymore */
list_remove_init(&front->shref);
/* Check that there really was one shadower of redundant left */
BUG_ON(!list_empty(&redundant->shdw_list));
/* Redundant won't be a shadow of its next object */
list_remove_init(&redundant->shref);
/* Front is now a shadow of redundant's next object */
list_insert(&front->shref, &redundant->orig_obj->shdw_list);
front->orig_obj = redundant->orig_obj;
/* Find last link for the object */
last_link = link_to_struct(redundant->link_list.next,
struct vm_obj_link, linkref);
/* Drop the last link to the object */
vma_drop_link(last_link);
/* Redundant shadow has no shadows anymore */
BUG_ON(--redundant->shadows < 0);
/* Delete the redundant shadow along with all its pages. */
vm_object_delete(redundant);
return 0;
}
struct vm_obj_link *vm_objlink_create(void)
{
struct vm_obj_link *vmo_link;
if (!(vmo_link = kzalloc(sizeof(*vmo_link))))
return PTR_ERR(-ENOMEM);
link_init(&vmo_link->list);
link_init(&vmo_link->linkref);
return vmo_link;
}
/*
* Creates a bare vm_object along with its vma link, since
* the shadow will be immediately used in a vma object list.
*/
struct vm_obj_link *vma_create_shadow(void)
{
struct vm_object *vmo;
struct vm_obj_link *vmo_link;
if (IS_ERR(vmo_link = vm_objlink_create()))
return 0;
if (!(vmo = vm_object_create())) {
kfree(vmo_link);
return 0;
}
vmo->flags = VM_OBJ_SHADOW;
vm_link_object(vmo_link, vmo);
return vmo_link;
}
/* Allocates a new page, copies the original onto it and returns. */
struct page *copy_to_new_page(struct page *orig)
{
void *paddr = alloc_page(1);
BUG_ON(!paddr);
/* Copy the page into new page */
memcpy(phys_to_virt(paddr), page_to_virt(orig), PAGE_SIZE);
return phys_to_page(paddr);
}
/* Copy all mapped object link stack from vma to new vma */
int vma_copy_links(struct vm_area *new_vma, struct vm_area *vma)
{
struct vm_obj_link *vmo_link, *new_link;
/* Get the first object on the vma */
BUG_ON(list_empty(&vma->vm_obj_list));
vmo_link = link_to_struct(vma->vm_obj_list.next,
struct vm_obj_link, list);
do {
/* Create a new link */
new_link = vm_objlink_create();
/* Link object with new link */
vm_link_object(new_link, vmo_link->obj);
/* Add the new link to vma in object order */
list_insert_tail(&new_link->list, &new_vma->vm_obj_list);
/* Continue traversing links, doing the same copying */
} while((vmo_link = vma_next_link(&vmo_link->list,
&vma->vm_obj_list)));
return 0;
}
/*
* Determine if an object is deletable.
*
* Shadows are deleted if nlinks = 0, and
* merged if they have nlinks = 1, shadows = 1.
* See below for explanation.
*
* vfs-type vmfiles are deleted if their
* openers = 0, and their nlinks
* (i.e. mappers) = 0.
*
* shm-type vmfiles are deleted if their
* nlinks = 0, since they only have map count.
*/
int vm_object_is_deletable(struct vm_object *obj)
{
struct vm_file *f;
//printf("%s: Checking: ", __FUNCTION__);
//vm_object_print(obj);
if (obj->nlinks != 0)
return 0;
BUG_ON(obj->shadows != 0);
BUG_ON(!list_empty(&obj->shref));
if (obj->flags & VM_OBJ_SHADOW)
return 1;
f = vm_object_to_file(obj);
/* Devzero should probably never have 0 refs left */
if (f->type == VM_FILE_DEVZERO)
return 0;
else if (f->type == VM_FILE_SHM)
return 1;
else if (f->type == VM_FILE_VFS) {
if (f->openers == 0)
return 1;
else
return 0;
}
/* To make gcc happy */
BUG();
return 0;
}
/*
* exit has: !prev, next || !next
* shadow drop has: prev, next
*/
/*
* Shadow drops: Dropping a link to shadow does not mean the shadow's
* next object has lost a shadow. There may be other links to both. But
* when the shadow has dropped its last link, and is going to be deleted,
* it is then true that the shadow is lost by the next object.
*/
int vma_drop_merge_delete(struct vm_area *vma, struct vm_obj_link *link)
{
struct vm_obj_link *prev, *next;
struct vm_object *obj;
/* Get previous and next links, if they exist */
prev = (link->list.prev == &vma->vm_obj_list) ? 0 :
link_to_struct(link->list.prev, struct vm_obj_link, list);
next = (link->list.next == &vma->vm_obj_list) ? 0 :
link_to_struct(link->list.next, struct vm_obj_link, list);
/* Drop the link */
obj = vma_drop_link(link);
/* If there is an object in front, this is a shadow drop */
if (prev) {
BUG_ON(!(prev->obj->flags & VM_OBJ_SHADOW));
BUG_ON(!(prev->obj->flags & VM_WRITE));
BUG_ON(--obj->shadows < 0);
// vm_object_print(obj);
/* Remove prev from current object's shadow list */
BUG_ON(list_empty(&prev->obj->shref));
list_remove_init(&prev->obj->shref);
/*
* We don't allow dropping non-shadow objects yet,
* (see ...is_droppable) so there must be a next.
*/
BUG_ON(!next);
/* prev is now shadow of next */
list_insert(&prev->obj->shref,
&next->obj->shdw_list);
prev->obj->orig_obj = next->obj;
/*
* No referrers left, meaning this object is not
* shadowing its original object anymore.
*/
if (obj->nlinks == 0) {
BUG_ON(obj->orig_obj != next->obj);
list_remove_init(&obj->shref);
} else {
/*
* Dropped object still has referrers, which
* means next has gained a new shadow.
* Here's why:
*
* T1 and T2: T2: drop-
* prev->drop->next \
* became: T1: prev--- next
*
* Now we have both prev and current object
* in next's shadow list.
*/
next->obj->shadows++;
}
/* It's an exit, we check if there's a shadow loss */
} else {
if (obj->nlinks == 0) {
/* Is it a shadow delete? Sort out next */
if (next && obj->flags & VM_OBJ_SHADOW) {
BUG_ON(obj->orig_obj != next->obj);
BUG_ON(--next->obj->shadows < 0);
// vm_object_print(next->obj);
list_remove_init(&obj->shref);
}
}
}
/* Now deal with the object itself */
if (vm_object_is_deletable(obj)) {
dprintf("Deleting object:\n");
// vm_object_print(obj);
vm_object_delete(obj);
} else if ((obj->flags & VM_OBJ_SHADOW) &&
obj->nlinks == 1 && obj->shadows == 1) {
dprintf("Merging object:\n");
// vm_object_print(obj);
vma_merge_object(obj);
}
mm0_test_global_vm_integrity();
return 0;
}
/*
* A scenario that pretty much covers every exit() case.
*
* T = vma on a unique task
* l = link
* Sobj = Shadow object
* Fobj = File object
*
* Every l links to the object on the nearest
* row to it and on the same column.
*
* l l l l l l T
* Sobj Sobj
*
* Sobj Sobj Sobj Fobj
*
* Sobj Sobj Sobj
* l l l l l l l T
*
* l l l l l l l T
* Sobj
*
*/
/* This version is used when exiting. */
int vma_drop_merge_delete_all(struct vm_area *vma)
{
struct vm_obj_link *vmo_link, *n;
/* Vma cannot be empty */
BUG_ON(list_empty(&vma->vm_obj_list));
/* Traverse and get rid of all links */
list_foreach_removable_struct(vmo_link, n, &vma->vm_obj_list, list)
vma_drop_merge_delete(vma, vmo_link);
return 0;
}
/* TODO:
* - Why not allocate a swap descriptor in vma_create_shadow() rather than
* a bare vm_object? It will be needed.
* - Check refcounting of shadows, their references, page refs,
* reduces increases etc.
*
* This handles copy-on-write semantics in various situations. Returns
* page struct for copy page availabe for mapping.
*
* 1) Copy-on-write of read-only files. (Creates r/w shadows/adds pages)
* 2) Copy-on-write of forked RO shadows (Creates r/w shadows/adds pages)
* 3) Copy-on-write of shm files. (Adds pages to r/w shm file from devzero).
*/
struct page *copy_on_write(struct fault_data *fault)
{
struct vm_obj_link *vmo_link, *shadow_link;
struct vm_object *shadow;
struct page *page, *new_page;
struct vm_area *vma = fault->vma;
unsigned long file_offset = fault_to_file_offset(fault);
/* Get the first object, either original file or a shadow */
if (!(vmo_link = vma_next_link(&vma->vm_obj_list, &vma->vm_obj_list))) {
printf("%s:%s: No vm object in vma!\n",
__TASKNAME__, __FUNCTION__);
BUG();
}
/* Is the object read-only? Create a shadow object if so.
*
* NOTE: Whenever the topmost object is read-only, a new shadow
* object must be created. When there are no shadows one is created
* because, its the original vm_object that is not writeable, and
* when there are shadows one is created because a fork had just
* happened, in which case all shadows are rendered read-only.
*/
if (!(vmo_link->obj->flags & VM_WRITE)) {
if (!(shadow_link = vma_create_shadow()))
return PTR_ERR(-ENOMEM);
/* Initialise the shadow */
shadow = shadow_link->obj;
shadow->orig_obj = vmo_link->obj;
shadow->flags = VM_OBJ_SHADOW | VM_WRITE;
shadow->pager = &swap_pager;
vmo_link->obj->shadows++;
// vm_object_print(vmo_link->obj);
dprintf("%s: Created a shadow:\n", __TASKNAME__);
// vm_object_print(shadow);
dprintf("%s: Original object:\n", __TASKNAME__);
// vm_object_print(shadow->orig_obj);
/*
* Add the shadow in front of the original:
*
* vma->link0->link1
* | |
* v v
* shadow original
*/
list_insert(&shadow_link->list, &vma->vm_obj_list);
/* Add object to original's shadower list */
list_insert(&shadow->shref, &shadow->orig_obj->shdw_list);
/* Add to global object list */
global_add_vm_object(shadow);
} else {
/* We ought to copy the missing RW page to top shadow */
dprintf("No new shadows. Going to add to "
"topmost r/w shadow object\n");
shadow_link = vmo_link;
/*
* FIXME: Here we check for the case that a cloned thread is
* doing a duplicate write request on an existing RW shadow
* page. If so, we return the existing writable page in the top
* shadow. We should find a generic way to detect duplicate
* requests and cease IPC at an earlier stage.
*/
page = shadow_link->obj->pager->ops.page_in(shadow_link->obj,
file_offset);
if (!IS_ERR(page))
return page;
/*
* We start page search on read-only objects. If the first
* one was writable, go to next which must be read-only.
*/
BUG_ON(!(vmo_link = vma_next_link(&vmo_link->list,
&vma->vm_obj_list)));
BUG_ON(vmo_link->obj->flags & VM_WRITE);
}
/* Traverse the list of read-only vm objects and search for the page */
while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
file_offset))) {
if (!(vmo_link = vma_next_link(&vmo_link->list,
&vma->vm_obj_list))) {
printf("%s:%s: Traversed all shadows and the original "
"file's vm_object, but could not find the "
"faulty page in this vma.\n",__TASKNAME__,
__FUNCTION__);
BUG();
}
}
/*
* Copy the page. This traverse and copy is like a page-in operation
* of a pager, except that the page is moving along vm_objects.
*/
new_page = copy_to_new_page(page);
/* Update page details */
spin_lock(&new_page->lock);
BUG_ON(!list_empty(&new_page->list));
new_page->refcnt = 0;
new_page->owner = shadow_link->obj;
new_page->offset = file_offset;
new_page->virtual = 0;
spin_unlock(&page->lock);
/* Add the page to owner's list of in-memory pages */
insert_page_olist(new_page, new_page->owner);
new_page->owner->npages++;
mm0_test_global_vm_integrity();
/* Shared faults don't have shadows so we don't look for collapses */
if (!(vma->flags & VMA_SHARED)) {
/*
* Finished handling the actual fault, now check for possible
* shadow collapses. Does the shadow completely shadow the one
* underlying it?
*/
if (!(vmo_link = vma_next_link(&shadow_link->list,
&vma->vm_obj_list))) {
/* Copier must have an object under it */
printf("Copier must have had an object under it!\n");
BUG();
}
if (vm_object_is_droppable(shadow_link->obj, vmo_link->obj))
vma_drop_merge_delete(vma, vmo_link);
}
return new_page;
}
/*
* Handles the page fault, all entries here are assumed *legal*
* faults, i.e. do_page_fault() should have already checked
* for illegal accesses.
*
* NOTE:
* Anon/Shared pages:
* First access from first process is COW. All subsequent RW
* accesses (which are attempts of *sharing*) simply map that
* page to faulting processes.
*
* Non-anon/shared pages:
* First access from first process simply writes to the pages
* of that file. All subsequent accesses by other processes
* do so as well.
*
* FIXME: Add VM_DIRTY bit for every page that has write-faulted.
*/
/* Handle read faults */
struct page *page_read_fault(struct fault_data *fault)
{
struct vm_area *vma = fault->vma;
struct vm_obj_link *vmo_link;
unsigned long file_offset;
struct page *page = 0;
file_offset = fault_to_file_offset(fault);
/* Get the first object, either original file or a shadow */
if (!(vmo_link = vma_next_link(&vma->vm_obj_list, &vma->vm_obj_list))) {
printf("%s:%s: No vm object in vma!\n",
__TASKNAME__, __FUNCTION__);
BUG();
}
/* Traverse the list of read-only vm objects and search for the page */
while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
file_offset))) {
if (!(vmo_link = vma_next_link(&vmo_link->list,
&vma->vm_obj_list))) {
printf("%s:%s: Traversed all shadows and the original "
"file's vm_object, but could not find the "
"faulty page in this vma.\n",__TASKNAME__,
__FUNCTION__);
BUG();
}
}
BUG_ON(!page);
return page;
}
struct page *page_write_fault(struct fault_data *fault)
{
unsigned int vma_flags = fault->vma->flags;
struct vm_area *vma = fault->vma;
struct vm_obj_link *vmo_link;
unsigned long file_offset;
struct page *page = 0;
/* Copy-on-write. All private vmas are always COW */
if (vma_flags & VMA_PRIVATE) {
BUG_ON(IS_ERR(page = copy_on_write(fault)));
/*
* This handles shared pages that are both anon and non-anon.
*/
} else if ((vma_flags & VMA_SHARED)) {
file_offset = fault_to_file_offset(fault);
/* Don't traverse, just take the first object */
BUG_ON(!(vmo_link = vma_next_link(&vma->vm_obj_list,
&vma->vm_obj_list)));
/* Get the page from its pager */
if (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
file_offset))) {
/*
* Writable page does not exist,
* if it is anonymous, it needs to be COW'ed,
* otherwise the file must have paged-in this
* page, so its a bug.
*/
if (vma_flags & VMA_ANONYMOUS) {
BUG_ON(IS_ERR(page = copy_on_write(fault)));
return page;
} else {
printf("%s: Could not obtain faulty "
"page from regular file.\n",
__TASKNAME__);
BUG();
}
}
/*
* Page and object are now dirty. Currently it's
* only relevant for file-backed shared objects.
*/
page->flags |= VM_DIRTY;
page->owner->flags |= VM_DIRTY;
} else
BUG();
return page;
}
struct page *__do_page_fault(struct fault_data *fault)
{
unsigned int reason = fault->reason;
unsigned int pte_flags = fault->pte_flags;
unsigned int map_flags = 0;
struct page *page = 0;
if ((reason & VM_READ) && (pte_flags & VM_NONE)) {
page = page_read_fault(fault);
map_flags = MAP_USR_RO;
} else if ((reason & VM_WRITE) && (pte_flags & VM_NONE)) {
page = page_read_fault(fault);
page = page_write_fault(fault);
map_flags = MAP_USR_RW;
} else if ((reason & VM_EXEC) && (pte_flags & VM_NONE)) {
page = page_read_fault(fault);
map_flags = MAP_USR_RX;
} else if ((reason & VM_EXEC) && (pte_flags & VM_READ)) {
/* Retrieve already paged in file */
page = page_read_fault(fault);
if (pte_flags & VM_WRITE)
map_flags = MAP_USR_RWX;
else
map_flags = MAP_USR_RX;
} else if ((reason & VM_WRITE) && (pte_flags & VM_READ)) {
page = page_write_fault(fault);
if (pte_flags & VM_EXEC)
map_flags = MAP_USR_RWX;
else
map_flags = MAP_USR_RW;
} else {
printf("mm0: Unhandled page fault.\n");
BUG();
}
BUG_ON(!page);
/* Map the new page to faulty task */
l4_map((void *)page_to_phys(page),
(void *)page_align(fault->address), 1,
map_flags, fault->task->tid);
// vm_object_print(page->owner);
return page;
}
/*
* Sets all r/w shadow objects as read-only for the process
* so that as expected after a fork() operation, writes to those
* objects cause copy-on-write events.
*/
int vm_freeze_shadows(struct tcb *task)
{
unsigned long virtual;
struct vm_area *vma;
struct vm_obj_link *vmo_link;
struct vm_object *vmo;
struct page *p;
list_foreach_struct(vma, &task->vm_area_head->list, list) {
/* Shared vmas don't have shadows */
if (vma->flags & VMA_SHARED)
continue;
/* Get the first object */
BUG_ON(list_empty(&vma->vm_obj_list));
vmo_link = link_to_struct(vma->vm_obj_list.next,
struct vm_obj_link, list);
vmo = vmo_link->obj;
/*
* Is this a writeable shadow?
*
* The only R/W shadow in a vma object chain
* can be the first one, so we don't check further
* objects if first one is not what we want.
*/
if (!((vmo->flags & VM_OBJ_SHADOW) &&
(vmo->flags & VM_WRITE)))
continue;
/* Make the object read only */
vmo->flags &= ~VM_WRITE;
vmo->flags |= VM_READ;
/*
* Make all pages on it read-only
* in the page tables.
*/
list_foreach_struct(p, &vmo->page_cache, list) {
/* Find virtual address of each page */
virtual = vma_page_to_virtual(vma, p);
/* Map the page as read-only */
l4_map((void *)page_to_phys(p),
(void *)virtual, 1,
MAP_USR_RO, task->tid);
}
}
return 0;
}
/*
* Page fault model:
*
* A page is anonymous (e.g. stack)
* - page needs read access:
* action: map the zero page.
* - page needs write access:
* action: allocate ZI page and map that. Swap file owns the page.
* - page is swapped to swap:
* action: read back from swap file into new page.
*
* A page is file-backed but private (e.g. .data section)
* - page needs read access:
* action: read the page from its file.
* - page is swapped out before being private. (i.e. invalidated)
* action: read the page from its file. (original file)
* - page is swapped out after being private.
* action: read the page from its file. (swap file)
* - page needs write access:
* action: allocate new page, declare page as private, change its
* owner to swap file.
*
* A page is file backed but not-private, and read-only. (e.g. .text section)
* - page needs read access:
* action: read in the page from its file.
* - page is swapped out. (i.e. invalidated)
* action: read in the page from its file.
* - page needs write access:
* action: forbidden, kill task?
*
* A page is file backed but not-private, and read/write. (e.g. any data file.)
* - page needs read access:
* action: read in the page from its file.
* - page is flushed back to its original file. (i.e. instead of swap)
* action: read in the page from its file.
* - page needs write access:
* action: read the page in, give write access.
*/
struct page *do_page_fault(struct fault_data *fault)
{
unsigned int vma_flags = (fault->vma) ? fault->vma->flags : VM_NONE;
unsigned int reason = fault->reason;
/* vma flags show no access */
if (vma_flags & VM_NONE) {
printf("Illegal access, tid: %d, address: 0x%x, PC @ 0x%x,\n",
fault->task->tid, fault->address, fault->kdata->faulty_pc);
fault_handle_error(fault);
}
/* The access reason is not included in the vma's listed flags */
if (!(reason & vma_flags)) {
printf("Illegal access, tid: %d, address: 0x%x, PC @ 0x%x\n",
fault->task->tid, fault->address, fault->kdata->faulty_pc);
fault_handle_error(fault);
}
/* Handle legitimate faults */
return __do_page_fault(fault);
}
struct page *page_fault_handler(struct tcb *sender, fault_kdata_t *fkdata)
{
struct fault_data fault = {
/* Fault data from kernel */
.kdata = fkdata,
.task = sender,
};
/* Extract fault reason, fault address etc. in generic format */
set_generic_fault_params(&fault);
/* Get vma info */
if (!(fault.vma = find_vma(fault.address,
&fault.task->vm_area_head->list)))
printf("Hmm. No vma for faulty region. "
"Bad things will happen.\n");
/* Handle the actual fault */
return do_page_fault(&fault);
}
static inline unsigned int pte_to_map_flags(unsigned int pte_flags)
{
unsigned int map_flags;
switch(pte_flags) {
case VM_READ:
map_flags = MAP_USR_RO;
break;
case (VM_READ | VM_WRITE):
map_flags = MAP_USR_RW;
break;
case (VM_READ | VM_WRITE | VM_EXEC):
map_flags = MAP_USR_RWX;
break;
case (VM_READ | VM_EXEC):
map_flags = MAP_USR_RX;
break;
default:
BUG();
}
return map_flags;
}
/*
* Prefaults a page of a task. The catch is that the page may already
* have been faulted with even more progress than the desired
* flags would progress in the fault (e.g. read-faulting a
* copy-on-write'd page).
*
* This function detects whether progress is necessary or not by
* inspecting the vma's vm_object chain state.
*
* Generally both read-fault and write-fault paths are repeatable, in
* the sense that an already faulted page may be safely re-faulted again
* and again, be it a read-only or copy-on-write'd page.
*
* The retrieval of the same page in a repetitive fashion is safe,
* but while it also seems to appear safe, it is unnecessary to downgrade
* or change mapping permissions of a page. E.g. make a copy-on-write'd
* page read-only by doing a blind read-fault on it.
*
* Hence this function checks whether a fault is necessary and simply
* returns if it isn't.
*
* FIXME: Escalate any page fault errors like a civilized function!
*/
struct page *task_prefault_smart(struct tcb *task, unsigned long address,
unsigned int wanted_flags)
{
struct vm_obj_link *vmo_link;
unsigned long file_offset;
unsigned int vma_flags, pte_flags;
struct vm_area *vma;
struct page *page;
int err;
struct fault_data fault = {
.task = task,
.address = address,
};
/* Find the vma */
if (!(fault.vma = find_vma(fault.address,
&fault.task->vm_area_head->list))) {
dprintf("%s: Invalid: No vma for given address. %d\n",
__FUNCTION__, -EINVAL);
return PTR_ERR(-EINVAL);
}
/* Read fault, repetitive safe */
if (wanted_flags & VM_READ)
if (IS_ERR(page = page_read_fault(&fault)))
return page;
/* Write fault, repetitive safe */
if (wanted_flags & VM_WRITE)
if (IS_ERR(page = page_write_fault(&fault)))
return page;
/*
* If we came this far, it means we have more
* permissions than VM_NONE.
*
* Now we _must_ find out what those page
* protection flags were, and do this without
* needing to inspect any ptes.
*
* We don't want to downgrade a RW page to RO again.
*/
file_offset = fault_to_file_offset(&fault);
vma_flags = fault.vma->flags;
vma = fault.vma;
/* Get the topmost vm_object */
if (!(vmo_link = vma_next_link(&vma->vm_obj_list,
&vma->vm_obj_list))) {
printf("%s:%s: No vm object in vma!\n",
__TASKNAME__, __FUNCTION__);
BUG();
}
/* Traverse the list of vm objects and search for the page */
while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
file_offset))) {
if (!(vmo_link = vma_next_link(&vmo_link->list,
&vma->vm_obj_list))) {
printf("%s:%s: Traversed all shadows and the original "
"file's vm_object, but could not find the "
"faulty page in this vma.\n",__TASKNAME__,
__FUNCTION__);
BUG();
}
}
/* Use flags for the vm_object containing the page */
if (vmo_link->obj->flags & VM_WRITE)
pte_flags = VM_WRITE | VM_READ;
else
pte_flags = VM_READ;
/*
* Now check vma flags for adding the VM_EXEC
* The real pte may not have this flag yet, but
* it is allowed to have it and it doesn't harm.
*/
if (vma_flags & VM_EXEC)
pte_flags |= VM_EXEC;
/* Map the page to task using these flags */
if ((err = l4_map((void *)page_to_phys(page),
(void *)page_align(fault.address), 1,
pte_to_map_flags(pte_flags),
fault.task->tid)) < 0) {
printf("l4_map() failed. err=%d\n", err);
BUG();
}
return page;
}
/*
* Prefaults the page with given virtual address, to given task
* with given reasons. Multiple reasons are allowed, they are
* handled separately in order.
*/
struct page *task_prefault_page(struct tcb *task, unsigned long address,
unsigned int vmflags)
{
struct page *ret;
perfmon_reset_start_cyccnt();
ret = task_prefault_smart(task, address, vmflags);
debug_record_cycles("task_prefault_smart");
return ret;
#if 0
struct page *p;
struct fault_data fault = {
.task = task,
.address = address,
};
dprintf("Pre-faulting address 0x%lx, on task %d, with flags: 0x%x\n",
address, task->tid, vmflags);
/* Find the vma */
if (!(fault.vma = find_vma(fault.address,
&fault.task->vm_area_head->list))) {
dprintf("%s: Invalid: No vma for given address. %d\n",
__FUNCTION__, -EINVAL);
return PTR_ERR(-EINVAL);
}
/* Flags may indicate multiple fault reasons. First do the read */
if (vmflags & VM_READ) {
fault.pte_flags = VM_NONE;
fault.reason = VM_READ;
if (IS_ERR(p = do_page_fault(&fault)))
return p;
}
/* Now write */
if (vmflags & VM_WRITE) {
fault.pte_flags = VM_READ;
fault.reason = VM_WRITE;
if (IS_ERR(p = do_page_fault(&fault)))
return p;
}
/* No exec or any other fault reason allowed. */
BUG_ON(vmflags & ~(VM_READ | VM_WRITE));
return p;
#endif
}
int vm_compare_prot_flags(unsigned int current, unsigned int needed)
{
current &= VM_PROT_MASK;
needed &= VM_PROT_MASK;
if (needed & VM_READ)
if (current & (VM_READ | VM_WRITE))
return 1;
if (needed & VM_WRITE &&
(current & VM_WRITE))
return 1;
return 0;
}