Initial efforts to add copy on write and shadow vmas.

This commit is contained in:
Bahadir Balban
2008-03-07 18:35:40 +00:00
parent 98b5c217f5
commit 7a54d722a7
10 changed files with 385 additions and 142 deletions

View File

@@ -31,6 +31,15 @@
#define VMA_COW (1 << 7)
#define VMA_FIXED (1 << 8)
/*
* A suggestion to how a non-page_array (i.e. a device)
* page could tell its physical address.
*/
struct devpage {
struct page page;
unsigned long phys;
};
struct page {
int count; /* Refcount */
struct spinlock lock; /* Page lock. */
@@ -38,7 +47,7 @@ struct page {
struct vm_object *owner;/* The vm_object the page belongs to */
unsigned long virtual; /* If refs >1, first mapper's virtual address */
unsigned int flags; /* Flags associated with the page. */
unsigned long f_offset; /* The offset page resides in its owner */
unsigned long offset; /* The offset page resides in its owner */
};
extern struct page *page_array;
@@ -59,8 +68,8 @@ struct fault_data {
};
struct vm_pager_ops {
int (*page_in)(struct vm_object *vm_obj, unsigned long pfn_offset);
int (*page_out)(struct vm_object *vm_obj, unsigned long pfn_offset);
struct page *(*page_in)(struct vm_object *vm_obj, unsigned long pfn_offset);
struct page *(*page_out)(struct vm_object *vm_obj, unsigned long pfn_offset);
};
/* Describes the pager task that handles a vm_area. */
@@ -120,14 +129,14 @@ struct vm_file {
void *priv_data; /* Device pagers use to access device info */
};
/* To create per-vma vm_object lists */
struct vma_obj_link {
struct list_head list;
struct vm_object *obj;
}
};
#define vm_object_to_file(obj) \
(struct vm_file *)container_of(obj, struct vm_file, vm_obj)
#define vm_object_to_file(obj) container_of(obj, struct vm_file, vm_obj)
/*
* Describes a virtually contiguous chunk of memory region in a task. It covers
@@ -146,7 +155,7 @@ struct vm_area {
unsigned long pfn_start; /* Region start virtual pfn */
unsigned long pfn_end; /* Region end virtual pfn, exclusive */
unsigned long flags; /* Protection flags. */
unsigned long f_offset; /* File offset in pfns */
unsigned long file_offset; /* File offset in pfns */
};
static inline struct vm_area *find_vma(unsigned long addr,

View File

@@ -1,27 +1,33 @@
/*
* Copyright (C) 2008 Bahadir Balban
*/
#include <l4/lib/list.h>
#include <vm_area.h>
#include <kmalloc/kmalloc.h>
/*
* This is yet unused, it is more of an anticipation
* of how mmaped devices would be mapped with a pager.
*/
struct mmap_device {
struct list_head page_list; /* Dyn-allocated page list */
unsigned long pfn_start; /* Physical pfn start */
unsigned long pfn_end; /* Physical pfn end */
};
struct page *mmap_device_page_in(struct vm_object *vm_obj,
unsigned long pfn_offset)
struct page *memdev_page_in(struct vm_object *vm_obj,
unsigned long pfn_offset)
{
struct vm_file *f = vm_obj_to_file(vm_obj);
struct mmap_device *mmdev = f->private_data;
struct vm_file *f = vm_object_to_file(vm_obj);
struct mmap_device *memdev = f->priv_data;
struct page *page;
/* Check if its within device boundary */
if (pfn_offset >= mmdev->pfn_end - mmdev->pfn_start)
return -1;
if (pfn_offset >= memdev->pfn_end - memdev->pfn_start)
return PTR_ERR(-1);
/* Simply return the page if found */
list_for_each_entry(page, &mmdev->page_list, list)
list_for_each_entry(page, &memdev->page_list, list)
if (page->offset == pfn_offset)
return page;
@@ -31,15 +37,16 @@ struct page *mmap_device_page_in(struct vm_object *vm_obj,
spin_lock_init(&page->lock);
page->offset = pfn_offset;
page->owner = vm_obj;
page->flags = DEVICE_PAGE;
list_add(&page->list, &mmdev->page_list)
list_add(&page->list, &memdev->page_list);
return page;
}
/* All mmapable devices are handled by this */
struct vm_pager mmap_device_pager {
.page_in = mmap_device_page_in,
struct vm_pager memdev_pager = {
.ops = {
.page_in = memdev_page_in,
},
};

View File

@@ -30,7 +30,7 @@ unsigned long fault_to_file_offset(struct fault_data *fault)
unsigned long vma_off_pfn = __pfn(fault->address) - fault->vma->pfn_start;
/* Fault's offset in the file */
unsigned long f_off_pfn = fault->vma->f_offset + vma_off_pfn;
unsigned long f_off_pfn = fault->vma->file_offset + vma_off_pfn;
return f_off_pfn;
}
@@ -83,6 +83,211 @@ struct vm_area *copy_on_write_vma(struct fault_data *fault)
return shadow;
}
/*
* Given a reference to a vm_object link, obtains the prev vm_object.
*
* vma->link1->link2->link3
* | | |
* V V V
* vmo1 vmo2 vmo3|vm_file
*
* E.g. given a reference to vma, obtains vmo3.
*/
struct vm_object *vma_get_prev_object(struct list_head *linked_list)
{
struct vm_object_link *link;
BUG_ON(list_empty(linked_list));
link = list_entry(linked_list.prev, struct vm_obj_link, list);
return link->obj;
}
/* Obtain the original mmap'ed object */
struct vm_object *vma_get_original_object(struct vm_area *vma)
{
return vma_get_prev_object(&vma->vm_obj_list);
}
/*
* Given a reference to a vm_object link, obtains the next vm_object.
*
* vma->link1->link2->link3
* | | |
* V V V
* vmo1 vmo2 vmo3|vm_file
*
* E.g. given a reference to vma, obtains vmo1.
*/
struct vm_object *vma_get_next_object(struct list_head *linked_list)
{
struct vm_object_link *link;
BUG_ON(list_empty(linked_list));
link = list_entry(linked_list.next, struct vm_obj_link, list);
return link->obj;
}
struct vm_obj_link *vma_create_shadow(void)
{
struct vm_object *vmo;
struct vm_obj_link *vmo_link;
if (!(vmo_link = kzalloc(sizeof(*vmo_link))))
return 0;
if (!(vmo = vm_object_alloc_init())) {
kfree(vmo_link);
return 0;
}
INIT_LIST_HEAD(&vmo_link->list);
vmo_link->obj = vmo;
return vmo_link;
}
struct page *copy_on_write_page_in(struct vm_object *shadow,
struct vm_object *orig,
unsigned long page_offset)
{
struct page *page, *newpage;
void *vaddr, *new_vaddr, *new_paddr;
int err;
/* The page is not resident in page cache. */
if (!(page = find_page(shadow, page_offset))) {
/* Allocate a new page */
newpaddr = alloc_page(1);
newpage = phys_to_page(paddr);
/* Get the faulty page from the original vm object. */
if (IS_ERR(page = orig->pager.ops->page_in(orig,
file_offset))) {
printf("%s: Could not obtain faulty page.\n",
__TASKNAME__);
BUG();
}
/* Map the new and orig page to self */
new_vaddr = l4_map_helper(paddr, 1);
vaddr = l4_map_helper(page_to_phys(page), 1);
/* Copy the page into new page */
memcpy(new_vaddr, vaddr, PAGE_SIZE);
/* Unmap both pages from current task. */
l4_unmap_helper(vaddr, 1);
l4_unmap_helper(new_vaddr, 1);
/* Update vm object details */
shadow->npages++;
/* Update page details */
spin_lock(&page->lock);
page->count++;
page->owner = shadow;
page->offset = page_offset;
page->virtual = 0;
/* Add the page to owner's list of in-memory pages */
BUG_ON(!list_empty(&page->list));
insert_page_olist(page, shadow);
spin_unlock(&page->lock);
}
return page;
}
int copy_on_write(struct fault_data *fault)
{
unsigned int reason = fault->reason;
unsigned int vma_flags = fault->vma->flags;
unsigned int pte_flags = vm_prot_flags(fault->kdata->pte);
unsigned long file_offset = fault_to_file_offset(fault);
struct vm_obj_link *vmo_link;
struct vm_object *vmo, *shadow;
struct page *page, *newpage;
vmo = vma_get_original_object(vma);
BUG_ON(vmo->type != VM_OBJ_FILE);
/* No shadows on this yet. Create new. */
if (list_empty(&vmo->shadows)) {
if (!(vmo_link = vma_create_shadow()))
return -ENOMEM;
/* Initialise the shadow */
shadow = vmo_link->obj;
shadow->vma_refcnt = 1;
shadow->orig_obj = vmo;
shadow->type = VM_OBJ_SHADOW;
shadow->pager = swap_pager;
/*
* Add the shadow in front of the original:
*
* vma->link0->link1
* | |
* V V
* shadow original
*/
list_add(&shadow->list, &vmo->list);
}
/*
* A transitional page-in operation that does the actual
* copy-on-write.
*/
copy_on_write_page_in(shadow, orig, file_offset);
/* Map it to faulty task */
l4_map(page_to_phys(page), (void *)page_align(fault->address), 1,
(reason & VM_READ) ? MAP_USR_RO_FLAGS : MAP_USR_RW_FLAGS,
fault->task->tid);
}
/*
* Handles the page fault, all entries here are assumed *legal* faults,
* i.e. do_page_fault() should have already checked for illegal accesses.
*/
int __do_page_fault(struct fault_data *fault)
{
unsigned int reason = fault->reason;
unsigned int vma_flags = fault->vma->flags;
unsigned int pte_flags = vm_prot_flags(fault->kdata->pte);
struct vm_object *vmo;
struct page *page;
/* Handle read */
if ((reason & VM_READ) && (pte_flags & VM_NONE)) {
unsigned long file_offset = fault_to_file_offset(fault);
vmo = vma_get_next_object(&vma->vm_obj_list);
/* Get the page from its pager */
if (IS_ERR(page = vmo->pager.ops->page_in(vmo, file_offset))) {
printf("%s: Could not obtain faulty page.\n",
__TASKNAME__);
BUG();
}
/* Map it to faulty task */
l4_map(page_to_phys(page), (void *)page_align(fault->address),1,
(reason & VM_READ) ? MAP_USR_RO_FLAGS : MAP_USR_RW_FLAGS,
fault->task->tid);
}
/* Handle write */
if ((reason & VM_WRITE) && (pte_flags & VM_READ)) {
/* Copy-on-write */
if (vma_flags & VMA_PRIVATE) {
copy_on_write(fault);
}
}
}
/*
* Handles any page ownership change or allocation for file-backed pages.
*/
@@ -416,7 +621,6 @@ int do_page_fault(struct fault_data *fault)
BUG(); /* Can't handle this yet. */
}
/* Handle legitimate read faults on the vma */
if (vma_flags & VMA_ANON)
err = do_anon_page(fault);
else
@@ -427,82 +631,6 @@ int do_page_fault(struct fault_data *fault)
return 0;
}
int file_pager_read_page(struct vm_file *f, unsigned long f_offset, void *dest_page)
{
int err;
/* Map the page to vfs task (shared mapping) */
l4_map(virt_to_phys(dest_page), dest_page, 1, MAP_USR_RW_FLAGS, VFS_TID);
/* vfs reads into the page. */
err = vfs_read(f->vnum, f_offset, 1, dest_page);
/* Unmap it from vfs */
l4_unmap(dest_page, 1, VFS_TID);
return err;
}
int file_pager_write_page(struct vm_file *f, unsigned long f_offset, void *src_page)
{
int err;
/* Map the page to vfs task (shared mapping) */
l4_map(virt_to_phys(src_page), src_page, 1, MAP_USR_RW_FLAGS, VFS_TID);
/* write the page via vfs. */
err = vfs_write(f->vnum, f_offset, 1, src_page);
/* Unmap it from vfs */
l4_unmap(src_page, 1, VFS_TID);
return err;
}
int boot_pager_read_page(struct vm_file *f, unsigned long f_off_pfn,
void *dest_page)
{
/* The address of page in the file */
void *file_page = (void *)(f->vnum + __pfn_to_addr(f_off_pfn));
/*
* Map the memfile's page into virtual memory.
*
* FIXME: Need to find a way of properly generating virtual addresses
* rather than one-to-one conversion.
*/
file_page = l4_map_helper(file_page, 1);
/* Copy it into destination page */
memcpy(dest_page, file_page, PAGE_SIZE);
return 0;
}
/* Pager for boot files read from sys_kdata() */
struct vm_pager boot_file_pager = {
.ops = {
.read_page = boot_pager_read_page,
.write_page= 0,
},
};
/* Pager for file pages */
struct vm_pager default_file_pager = {
.ops = {
.read_page = file_pager_read_page,
.write_page= 0,
},
};
/* Swap pager for anonymous and private pages */
struct vm_pager swap_pager = {
.ops = {
.read_page = 0,
.write_page= 0,
},
};
void page_fault_handler(l4id_t sender, fault_kdata_t *fkdata)
{
struct fault_data fault = {

View File

@@ -111,16 +111,6 @@ int vfs_receive_sys_open(l4id_t sender, l4id_t opener, int fd,
return 0;
}
struct page *find_page(struct vm_file *f, unsigned long pfn)
{
struct page *p;
list_for_each_entry(p, &f->page_cache_list, list)
if (p->f_offset == pfn)
return p;
return 0;
}
/*
* Inserts the page to vmfile's list in order of page frame offset.

View File

@@ -4,11 +4,11 @@
* Copyright (C) 2007 Bahadir Balban
*/
#include <stdio.h>
#include <l4lib/arch/syscalls.h>
#include <kdata.h>
#include <string.h>
#include <init.h>
#include INC_API(kip.h)
#include <kmalloc/kmalloc.h>
#include <l4lib/arch/syscalls.h>
/* Kernel data acquired during initialisation */
struct initdata initdata;

View File

@@ -377,7 +377,6 @@ int vma_intersect(unsigned long pfn_start, unsigned long pfn_end,
printf("%s: VMAs overlap.\n", __FUNCTION__);
return 1;
}
if ((pfn_end >= vma->pfn_end) && (pfn_start < vma->pfn_end)) {
printf("%s: VMAs overlap.\n", __FUNCTION__);
return 1;
@@ -405,7 +404,7 @@ unsigned long find_unmapped_area(unsigned long npages, struct tcb *task)
vma = list_entry(&task->vm_area_head.next, struct vm_area, list);
/* Start searching from task's end of data to start of stack */
while (pfn_end < __pfn(task->map_end)) {
while (pfn_end <= __pfn(task->map_end)) {
/* If intersection, skip the vma and fast-forward to next */
if (vma_intersection(pfn_start, pfn_end, vma)) {
@@ -485,7 +484,7 @@ int do_mmap(struct vm_file *mapfile, unsigned long file_offset, struct tcb *task
find_unmapped_area(npages, task)) < 0)
return (int)map_address;
/* Create a new vma for new address */
/* Create a new vma for newly allocated address */
else if (!(vma_new = vma_new(__pfn(map_address), npages,
flags, file_offset, mapfile)))
return -ENOMEM;
@@ -493,8 +492,10 @@ int do_mmap(struct vm_file *mapfile, unsigned long file_offset, struct tcb *task
goto out_success;
}
/* FIXME: Currently we don't allow overlapping vmas. To be fixed soon */
/* FIXME: Handle intersection, splitting, shrink/grow etc. */
/*
* FIXME: Currently we don't allow overlapping vmas. To be fixed soon
* We need to handle intersection, splitting, shrink/grow etc.
*/
list_for_each_entry(vma_mapped, &task->vm_area_list, list)
BUG_ON(vma_intersect(map_pfn, map_pfn + npages, vma_mapped));

View File

@@ -1,18 +1,90 @@
/*
* Copyright (C) 2008 Bahadir Balban
*/
#include <l4/macros.h>
#include <l4/lib/list.h>
#include <l4lib/arch/syscalls.h>
#include <l4lib/arch/syslib.h>
#include <mm/alloc_page.h>
#include <vm_area.h>
#include <string.h>
#include <file.h>
#include <init.h>
#include INC_ARCH(bootdesc.h)
struct page *find_page(struct vm_object *obj, unsigned long pfn)
{
struct page *p;
list_for_each_entry(p, &obj->page_cache, list)
if (p->offset == pfn)
return p;
return 0;
}
struct page *copy_on_write_page_in(struct vm_object *vm_obj, unsigned long page_offset)
{
struct vm_object *orig = vma_get_next_object(vm_obj);
struct page *page;
void *vaddr, *paddr;
int err;
vm_object_to_file(vm_obj);
/* The page is not resident in page cache. */
if (!(page = find_page(vm_obj, page_offset))) {
/* Allocate a new page */
paddr = alloc_page(1);
vaddr = phys_to_virt(paddr);
page = phys_to_page(paddr);
/* Map the page to vfs task */
l4_map(paddr, vaddr, 1, MAP_USR_RW_FLAGS, VFS_TID);
/* Syscall to vfs to read into the page. */
if ((err = vfs_read(f->vnum, page_offset, 1, vaddr)) < 0)
goto out_err;
/* Unmap it from vfs */
l4_unmap(vaddr, 1, VFS_TID);
/* Update vm object details */
vm_obj->npages++;
/* Update page details */
spin_lock(&page->lock);
page->count++;
page->owner = vm_obj;
page->offset = page_offset;
page->virtual = 0;
/* Add the page to owner's list of in-memory pages */
BUG_ON(!list_empty(&page->list));
insert_page_olist(page, vm_obj);
spin_unlock(&page->lock);
}
return page;
out_err:
l4_unmap(vaddr, 1, VFS_TID);
free_page(paddr);
return PTR_ERR(err);
}
struct page *file_page_in(struct vm_object *vm_obj, unsigned long page_offset)
{
struct vm_file *f = vm_object_to_file(vm_obj);
struct page *page;
void *vaddr, *paddr;
int err;
/* The page is not resident in page cache. */
if (!(page = find_page(vm_obj, page_offset)))
if (!(page = find_page(vm_obj, page_offset))) {
/* Allocate a new page */
void *paddr = alloc_page(1);
void *vaddr = phys_to_virt(paddr);
paddr = alloc_page(1);
vaddr = phys_to_virt(paddr);
page = phys_to_page(paddr);
/* Map the page to vfs task */
@@ -53,13 +125,15 @@ out_err:
* This reads-in a range of pages from a file and populates the page cache
* just like a page fault, but its not in the page fault path.
*/
int read_file_pages(struct vm_file *vmfile, unsigned long pfn_start,
int read_file_pages(struct vm_file *f, unsigned long pfn_start,
unsigned long pfn_end)
{
struct page *page;
struct page *p;
for (int f_offset = pfn_start; f_offset < pfn_end; f_offset++)
vmfile->vm_obj->pager.ops->page_in(vmfile->vm_obj, f_offset);
if (IS_ERR(p = f->vm_obj.pager->ops.page_in(&f->vm_obj,
f_offset)))
return (int)p;
return 0;
}
@@ -68,15 +142,42 @@ int read_file_pages(struct vm_file *vmfile, unsigned long pfn_start,
* All non-mmapable char devices are handled by this.
* VFS calls those devices to read their pages
*/
struct vm_pager file_pager {
.page_in = file_page_in,
struct vm_pager file_pager = {
.ops = {
.page_in = file_page_in,
},
};
/* A proposal for shadow vma container, could be part of vm_file->priv_data */
struct vm_swap_node {
struct vm_file *swap_file;
struct task_ids task_ids;
struct address_pool *pool;
};
/*
* This should save swap_node/page information either in the pte or in a global
* list of swap descriptors, and then write the page into the possibly one and
* only swap file.
*/
struct page *swap_page_in(struct vm_object *vm_obj, unsigned long file_offset)
{
BUG();
}
struct vm_pager swap_pager = {
.ops = {
.page_in = swap_page_in,
},
};
/* Returns the page with given offset in this vm_object */
struct page *bootfile_page_in(struct vm_object *vm_obj,
unsigned long pfn_offset)
{
struct vm_file *boot_file = vm_obj_to_file(vm_obj);
struct vm_file *boot_file = vm_object_to_file(vm_obj);
struct svc_image *img = boot_file->priv_data;
struct page *page = phys_to_page(img->phys_start +
__pfn_to_addr(pfn_offset));
@@ -88,24 +189,21 @@ struct page *bootfile_page_in(struct vm_object *vm_obj,
return page;
}
struct vm_pager bootfile_pager {
.page_in = bootfile_page_in,
struct vm_pager bootfile_pager = {
.ops = {
.page_in = bootfile_page_in,
},
};
LIST_HEAD(&boot_file_list);
LIST_HEAD(boot_file_list);
/* From bare boot images, create mappable device files */
int init_boot_files(struct initdata *initdata)
{
struct svc_image *img;
unsigned int sp, pc;
struct tcb *task;
struct task_ids ids;
struct bootdesc *bd;
struct bootdesc *bd = initdata->bootdesc;
struct vm_file *boot_file;
int err;
struct svc_image *img;
bd = initdata->bootdesc;
INIT_LIST_HEAD(&initdata->boot_file_list);
for (int i = 0; i < bd->total_images; i++) {
@@ -113,11 +211,11 @@ int init_boot_files(struct initdata *initdata)
boot_file = vm_file_alloc_init();
boot_file->priv_data = img;
boot_file->length = img->phys_end - img->phys_start;
boot_file->pager = &bootfile_pager;
boot_file->type = VM_FILE_BOOTFILE;
/* Initialise the vm object */
boot_file->vm_obj.type = VM_OBJ_FILE;
boot_file->vm_obj.pager = &bootfile_pager;
/* Add the object to global vm_object list */
list_add(&boot_file->vm_obj.list, &vm_object_list);
@@ -125,25 +223,31 @@ int init_boot_files(struct initdata *initdata)
/* Add the file to initdata's bootfile list */
list_add(&boot_file->list, &initdata->boot_file_list);
}
return 0;
}
/* Returns the page with given offset in this vm_object */
struct page *devzero_page_in(struct vm_object *vm_obj,
unsigned long page_offset)
{
struct vm_file *devzero = vm_obj_to_file(vm_obj);
struct vm_file *devzero = vm_object_to_file(vm_obj);
struct page *zpage = devzero->priv_data;
BUG_ON(!(devzero->type & VM_FILE_DEVZERO));
/* Update zero page struct. */
spin_lock(&page->lock);
spin_lock(&zpage->lock);
BUG_ON(zpage->count < 0);
zpage->count++;
spin_unlock(&page->lock);
spin_unlock(&zpage->lock);
return zpage;
}
struct vm_pager devzero_pager {
.page_in = devzero_page_in,
struct vm_pager devzero_pager = {
.ops = {
.page_in = devzero_page_in,
},
};
struct vm_file *get_devzero(void)
@@ -173,7 +277,7 @@ int init_devzero(void)
/* Allocate and initialise devzero file */
devzero = vmfile_alloc_init();
devzero->vm_obj.npages = ~0;
devzero->vm_obj.pager = devzero_pager;
devzero->vm_obj.pager = &devzero_pager;
devzero->vm_obj.type = VM_OBJ_FILE;
devzero->type = VM_FILE_DEVZERO;
devzero->priv_data = zpage;

View File

@@ -88,7 +88,7 @@ static int do_shmat(struct shm_descriptor *shm, void *shm_addr, int shmflg,
* per segment and its the same for all the system tasks.
*/
if ((err = do_mmap(0, 0, task, (unsigned long)shm_addr,
VM_READ | VM_WRITE | VMA_ANON | VMA_SHARED,
VM_READ | VM_WRITE | VMA_ANONYMOUS | VMA_SHARED,
shm->size)) < 0) {
printf("do_mmap: Mapping shm area failed with %d.\n", err);
BUG();

View File

@@ -230,6 +230,10 @@ int start_boot_task(struct vm_file *file, struct task_ids *ids)
task->text_start = task->data_start;
task->text_end = task->data_end;
/* Task's region available for mmap */
task->map_start = task->data_end;
task->map_end = task->stack_start;
/* Set up task's registers */
sp = align(task->stack_end - 1, 8);
pc = task->text_start;

View File

@@ -30,7 +30,7 @@ struct vm_object *vm_object_alloc_init(void)
struct vm_object *obj;
if (!(obj = kzalloc(sizeof(*obj))))
return PTR_ERR(-ENOMEM);
return 0;
return vm_object_init(obj);
}