Forks and COW situations show that we need vm objects rather than vm_files.

This is the first commit towards implementing vm object based paging with
right COW methods.
This commit is contained in:
Bahadir Balban
2008-03-03 22:05:01 +00:00
parent e2e6c89da2
commit 58b833dd7f
11 changed files with 272 additions and 139 deletions

52
TODO
View File

@@ -400,6 +400,58 @@ Current todo:
==============
- Use shmat/shmget/shmdt to map block device areas to FS0 and start implementing the VFS.
todo:
- Generate 4 vmfiles:
- env, stack, data, bss.
- Fill in env as a private file.
As faults occur on env, simply map file to process.
- Create an empty data, bss and stack file.
As faults occur on real data, copy on write onto proc->data file, by creating shadows.
As faults occur on devzero, copy on write onto proc->stack file, by creating shadows.
As faults occur on bss, copy on write onto proc->bss file, by creating shadows.
FORK:
If a fork occurs, copy all vmas into new task.
Find all RW and VM_PRIVATE regions. All RW shadows are eligible.
Create a fork file for each RW/VM_PRIVATE region. E.g.
task->fork->data
task->fork->stack
task->fork->bss
All RW/PRIVATE shadows become RO, with task->fork owners, rather than their original
owners e.g. proc->data, proc->stack etc. All pages under shadow are moved onto those files.
Increase file refcount for forker tasks.
As faults occur on fork->stack/bss/data, copy on write onto proc->stack/bss/data, by making
shadows RW again and copying those faulted pages from fork files onto the proc->x files.

View File

@@ -42,6 +42,7 @@
#define L4_IPC_TAG_MKDIR 19
#define L4_IPC_TAG_MMAP2 20
#define L4_IPC_TAG_CHDIR 21
#define L4_IPC_TAG_FORK 22
/* Tags for ipc between fs0 and mm0 */
#define L4_IPC_TAG_TASKDATA 25

View File

@@ -13,6 +13,8 @@
#include <l4lib/arch/syslib.h>
#include <l4lib/ipcdefs.h>
#include <fcntl.h>
#include <l4/macros.h>
#include INC_GLUE(memory.h)
/*
* Arguments that are too large to fit in message registers are
@@ -30,7 +32,7 @@ static inline int l4_open(const char *pathname, int flags, mode_t mode)
int fd;
// write_mr(L4SYS_ARG0, (unsigned long)pathname);
copy_to_utcb(pathname, strlen(pathname));
copy_to_utcb((void *)pathname, strlen(pathname));
write_mr(L4SYS_ARG1, flags);
write_mr(L4SYS_ARG2, (u32)mode);

View File

@@ -1,13 +1,15 @@
#ifndef __MM0_PROC__
#define __MM0_PROC__
#include <vm_area.h>
struct proc_files {
struct vm_file *stackfile; /* ZI, private, devzero, then autogenerated */
struct vm_file *envfile; /* NON-ZI, private, autogenerated, then autogenerated */
struct vm_file *datafile; /* NON-ZI, private, real file, then autogenerated */
struct vm_file *bssfile; /* ZI private, devzero, then autogenerated */
struct vm_object *stack_file; /* ZI, RO: devzero, RW: private */
struct vm_object *env_file; /* NON-ZI, RO: private, RW: private */
struct vm_object *data_file; /* NON-ZI, RO: shared, RW: private */
struct vm_object *bss_file; /* ZI, RO: devzero, RW: private */
};
int task_prepare_procfiles(struct tcb *t);
int task_setup_vm_objects(struct tcb *t);
#endif

View File

@@ -1,7 +1,7 @@
/*
* Thread control block.
*
* Copyright (C) 2007 Bahadir Balban
* Copyright (C) 2007, 2008 Bahadir Balban
*/
#ifndef __TASK_H__
#define __TASK_H__
@@ -18,9 +18,6 @@
#define TASK_OFILES_MAX 32
/* Allow per-task anonymous memory to grow as much as 1 MB for now. */
#define TASK_SWAPFILE_MAXSIZE SZ_1MB
struct vm_file;
struct file_descriptor {
@@ -78,12 +75,6 @@ struct tcb {
/* File descriptors for this task */
struct file_descriptor fd[TASK_OFILES_MAX];
/* Per-task swap file for now */
struct vm_file *swap_file;
/* Pool to generate swap file offsets for fileless anonymous regions */
struct address_pool swap_file_offset_pool;
};
struct tcb *find_task(int tid);
@@ -95,7 +86,4 @@ void dump_tasks(void);
void send_task_data(l4id_t requester);
/* Used by servers that have a reference to tcbs (e.g. a pager) */
#define current ((struct ktcb *)__L4_ARM_Utcb()->usr_handle)
#endif /* __TASK_H__ */

View File

@@ -28,15 +28,16 @@
#define VMA_ANON (1 << 4)
/* Private copy of a file VMA, can be ZI */
#define VMA_COW (1 << 5)
/* This marks shadow vmas */
#define VMA_SHADOW (1 << 6)
/* VMA object type flags */
#define VMOBJ_SHADOW (1 << 6)
struct page {
int count; /* Refcount */
struct spinlock lock; /* Page lock. */
struct list_head list; /* For list of a file's in-memory pages */
struct list_head list; /* For list of a vm_object's in-memory pages */
struct vm_object *owner;/* The vm_object the page belongs to */
unsigned long virtual; /* If refs >1, first mapper's virtual address */
struct vm_file *owner; /* The file it belongs to */
unsigned int flags; /* Flags associated with the page. */
unsigned long f_offset; /* The offset page resides in its owner */
};
@@ -59,8 +60,8 @@ struct fault_data {
};
struct vm_pager_ops {
int (*read_page)(struct vm_file *f, unsigned long f_offset, void *pagebuf);
int (*write_page)(struct vm_file *f, unsigned long f_offset, void *pagebuf);
int (*page_in)(struct vm_object *vm_obj, unsigned long f_offset);
int (*page_out)(struct vm_object *vm_obj, unsigned long f_offset);
};
/* Describes the pager task that handles a vm_area. */
@@ -69,35 +70,57 @@ struct vm_pager {
};
/*
* Describes the in-memory representation of a file. This could
* point at a file or another resource, e.g. a device area or swapper space.
* Describes the in-memory representation of a resource. This could
* point at a file or another resource, e.g. a device area, swapper space,
* the anonymous internal state of a process, etc. This covers more than
* just files, e.g. during a fork, captures the state of internal shared
* copy of private pages for a process, which is really not a file.
*/
struct vm_file {
int refcnt;
unsigned long vnum; /* Vnode number */
unsigned long length;
struct list_head list; /* List of all vm files in memory */
/* This is the cache of physical pages that this file has in memory. */
struct list_head page_cache_list;
struct vm_pager *pager;
struct vm_object {
int npages; /* Number of pages in memory */
int vma_refcnt; /* Number of vmas that refer */
int shadow_refcnt; /* Number of shadows that refer */
struct list_head shadows; /* List of vm objects that shadow this one */
struct vm_object *orig_vma; /* Original object that this one shadows */
unsigned int type; /* Defines the type of the object */
struct list_head list; /* List of all vm objects in memory */
struct list_head page_cache;/* List of in-memory pages */
struct vm_pager *pager; /* The pager for this object */
union private_data { /* Private data about the object */
struct vm_file *file; /* VFS file-specific information */
} priv;
};
/* In memory representation of a vfs file. */
struct vm_file {
unsigned long vnum;
unsigned long length;
};
/* To create per-vma vm_object lists */
struct vma_obj_list {
struct list_head list;
struct vm_object *obj;
}
/*
* Describes a virtually contiguous chunk of memory region in a task. It covers
* a unique virtual address area within its task, meaning that it does not
* overlap with other regions in the same task. The region could be backed by a
* file or various other resources. This is managed by the region's pager.
*
* COW: Upon copy-on-write, each copy-on-write instance creates a shadow of the
* original vma which supersedes the original vma with its copied modified pages.
* This creates a stack of shadow vmas, where the top vma's copy of pages
* supersede the ones lower in the stack.
*/
struct vm_area {
struct list_head list; /* Vma list */
struct list_head shadow_list; /* Head for shadow list. See fault.c */
struct list_head list; /* Per-task vma list */
struct list_head vm_obj_list; /* Head for vm_object list. */
unsigned long pfn_start; /* Region start virtual pfn */
unsigned long pfn_end; /* Region end virtual pfn, exclusive */
unsigned long flags; /* Protection flags. */
unsigned long f_offset; /* File offset in pfns */
struct vm_file *owner; /* File that backs the area. */
};
static inline struct vm_area *find_vma(unsigned long addr,
@@ -112,8 +135,8 @@ static inline struct vm_area *find_vma(unsigned long addr,
return 0;
}
/* Adds a page to its vmfile's page cache in order of offset. */
int insert_page_olist(struct page *this, struct vm_file *f);
/* Adds a page to its vm_objects's page cache in order of offset. */
int insert_page_olist(struct page *this, struct vm_object *vm_obj);
/* Pagers */
extern struct vm_pager default_file_pager;

View File

@@ -14,27 +14,6 @@
#include <posix/sys/types.h>
#include <string.h>
/* Global list of in-memory vm files. */
struct list_head vm_file_list;
/* Allocate and initialise a vmfile, and return it */
struct vm_file *vmfile_alloc_init(void)
{
struct vm_file *file;
if (!(file = kzalloc(sizeof(*file))))
return PTR_ERR(-ENOMEM);
INIT_LIST_HEAD(&file->list);
INIT_LIST_HEAD(&file->page_cache_list);
return file;
}
void vmfile_init(void)
{
INIT_LIST_HEAD(&vm_file_list);
}
int vfs_read(unsigned long vnum, unsigned long f_offset, unsigned long npages,
void *pagebuf)

View File

@@ -62,8 +62,6 @@ void init_mm(struct initdata *initdata)
init_utcb();
printf("%s: Initialised own utcb.\n", __TASKNAME__);
vmfile_init();
/* Give the kernel some memory to use for its allocators */
l4_kmem_grant(__pfn(alloc_page(__pfn(SZ_1MB))), __pfn(SZ_1MB));
}

View File

@@ -1,15 +1,6 @@
/*
* This implements a per-process virtual private file
* server to store environment variables.
*
* Using a per-process private file for the environment
* gives the impression as if a file-backed env/arg area
* is mapped on every process. By this means the env/arg
* pages dont need special processing and are abstracted
* away as files. Same idea can be applied to other
* private regions of a process such as the stack, so
* that debuggers can use file-based process inspection
* methods.
* Anonymous files for the process (e.g. stack, data, env)
* are implemented here.
*
* Copyright (C) 2008 Bahadir Balban
*/
@@ -24,80 +15,150 @@
#include <task.h>
#include <proc.h>
struct envdata {
struct list_head list;
void *env_data;
int env_size;
int id;
};
LIST_HEAD(env_list);
static void *zpage_p;
static struct page *zpage;
/* Copies environment data into provided page. */
int task_env_pager_read_page(struct vm_file *f, unsigned long f_off_pfn,
void *dest_page)
static struct vm_object devzero;
void init_zero_page(void)
{
struct envdata *env;
void *zpage_v;
zpage_p = alloc_page(1);
zpage = phys_to_page(zpage_p);
list_for_each_entry(env, &env_list, list)
if (env->id == f->vnum)
goto copyenv;
/* Map it to self */
zpage_v = l4_map_helper(zpage_p, 1);
printf("%s: No such env id: %d, to copy environment for.\n",
__TASKNAME__, f->vnum);
return -EINVAL;
/* Zero it */
memset(zpage_v, 0, PAGE_SIZE);
copyenv:
if (f_off_pfn != 0) {
printf("%s: Environments currently have a single page.\n");
return -EINVAL;
}
/* Unmap it */
l4_unmap_helper(zpage_v, 1);
memset(dest_page, 0, PAGE_SIZE);
BUG_ON(env->env_size > PAGE_SIZE);
memcpy(dest_page, env->env_data, env->env_size);
/* Update page struct. All other fields are zero */
zpage->count++;
}
#define VM_OBJ_MASK 0xFFFF
#define VM_OBJ_DEVZERO (1 << 0) /* Devzero special file */
#define VM_OBJ_FILE (1 << 1) /* Regular VFS file */
#define VM_OBJ_SHADOW (1 << 2) /* Shadow of another object */
/* Returns the page with given offset in this vm_object */
struct page *devzero_pager_page_in(struct vm_object *vm_obj, unsigned long f_offset)
{
return zpage;
}
struct vm_pager devzero_pager {
page_in = devzero_pager_page_int,
};
void init_devzero(void)
{
init_zero_page();
INIT_LIST_HEAD(&devzero.page_cache);
INIT_LIST_HEAD(&devzero.list);
INIT_LIST_HEAD(&devzero.shadows);
/* Devzero has infinitely many pages ;-) */
devzero.npages = -1;
devzero.type = VM_OBJ_FILE;
devzero.pager = &devzero_pager;
}
struct vm_file *get_devzero(void)
{
return &devzero;
}
void *get_zero_page(void)
{
zpage->count++;
return zpage_p;
}
void put_zero_page(void)
{
zpage->count--;
BUG_ON(zpage->count < 0);
}
/* Allocates and fills in the env page. This is like a pre-faulted file. */
int task_populate_env(struct task *task)
{
void *paddr = alloc_page(1);
void *vaddr = phys_to_virt(paddr);
struct page *page = phys_to_page(paddr);
/* Map new page at a self virtual address temporarily */
l4_map(paddr, vaddr, 1, MAP_USR_RW_FLAGS, self_tid());
/* Clear the page */
memset((void *)vaddr, 0, PAGE_SIZE);
/* Fill in environment data */
memcpy((void *)vaddr, &t->utcb_address, sizeof(t->utcb_address));
/* Remove temporary mapping */
l4_unmap((void *)vaddr, 1, self_tid());
spin_lock(&page->lock);
/* Environment file owns this page */
page->owner = task->proc_files->env_file;
/* Add the page to it's owner's list of in-memory pages */
BUG_ON(!list_empty(&page->list));
insert_page_olist(page, page->owner);
/* The offset of this page in its owner file */
page->f_offset = 0;
page->count++;
page->virtual = 0;
spin_unlock(&page->lock);
return 0;
}
/* Pager for environment files */
struct vm_pager task_env_pager = {
.ops = {
.read_page = task_env_pager_read_page,
.write_page= 0,
},
};
#define TASK_DATA_VNUM 1
#define TASK_STACK_VNUM 2
#define TASK_ENV_VNUM 3
/*
* For a task that is about to execute, this dynamically
* generates its environment file, and environment data.
*/
int task_prepare_environment(struct tcb *t)
int task_setup_vm_objects(struct tcb *t)
{
struct envdata *env;
struct proc_files *pf = &t->proc_files;
/* Allocate a new vmfile for this task's environment */
if (IS_ERR(t->env_file = vmfile_alloc_init()))
if (IS_ERR(pf->stack_file = vmfile_alloc_init()))
return (int)t->stack_file;
if (IS_ERR(pf->env_file = vmfile_alloc_init()))
return (int)t->env_file;
if (IS_ERR(pf->env_file = vmfile_alloc_init()))
return (int)t->data_file;
/* Initialise and add it to global vmfile list */
/*
* NOTE: Temporarily we can use tid as the vnum because
* this is the only per-task file.
*/
t->env_file->vnum = t->tid;
t->env_file->length = PAGE_SIZE;
t->env_file->pager = &task_env_pager;
t->env_file->vnum = (t->tid << 16) | TASK_ENV_VNUM;
t->env_file->length = t->env_end - t->env_start;
t->env_file->pager = &task_anon_pager;
list_add(&t->env_file->list, &vm_file_list);
/* Allocate, initialise and add per-task env data */
BUG_ON(!(env = kzalloc(sizeof(struct envdata))));
INIT_LIST_HEAD(&env->list);
env->env_data = &t->utcb_address;
env->env_size = sizeof(t->utcb_address);
env->id = t->tid;
list_add(&env->list, &env_list);
t->stack_file->vnum = (t->tid << 16) TASK_STACK_VNUM;
t->stack_file->length = t->stack_end - t->stack_start;
t->stack_file->pager = &task_anon_pager;
list_add(&t->stack_file->list, &vm_file_list);
return 0;
t->data_file->vnum = (t->tid << 16) TASK_DATA_VNUM;
t->data_file->length = t->data_end - t->data_start;
t->data_file->pager = &task_anon_pager;
list_add(&t->data_file->list, &vm_file_list);
/* Allocate, initialise and add per-task env data */
return task_populate_env(task);
}

View File

@@ -64,11 +64,6 @@ struct tcb *create_init_tcb(struct tcb_head *tcbs)
/* Ids will be acquired from the kernel */
task->tid = TASK_ID_INVALID;
task->spid = TASK_ID_INVALID;
task->swap_file = kzalloc(sizeof(struct vm_file));
task->swap_file->pager = &swap_pager;
address_pool_init(&task->swap_file_offset_pool, 0,
__pfn(TASK_SWAPFILE_MAXSIZE));
INIT_LIST_HEAD(&task->swap_file->page_cache_list);
INIT_LIST_HEAD(&task->list);
INIT_LIST_HEAD(&task->vm_area_list);
list_add_tail(&task->list, &tcbs->list);
@@ -144,7 +139,7 @@ int start_boot_tasks(struct initdata *initdata, struct tcb_head *tcbs)
* when faulted, simply copies the task env data to the
* allocated page.
*/
if (task_prepare_environment(task) < 0) {
if (task_prepare_proc_files(task) < 0) {
printf("Could not create environment file.\n");
goto error;
}
@@ -156,8 +151,11 @@ int start_boot_tasks(struct initdata *initdata, struct tcb_head *tcbs)
task->stack_end = task->env_start;
task->stack_start = task->stack_end - PAGE_SIZE * 4;
/* Only text start is valid */
task->text_start = USER_AREA_START;
/* Currently RO text and RW data are one region */
task->data_start = USER_AREA_START;
task->data_end = USER_AREA_START + file->length;
task->text_start = task->data_start;
task->text_end = task->data_end;
/* Set up task's registers */
sp = align(task->stack_end - 1, 8);
@@ -165,15 +163,15 @@ int start_boot_tasks(struct initdata *initdata, struct tcb_head *tcbs)
/* mmap each task's physical image to task's address space. */
if ((err = do_mmap(file, 0, task, USER_AREA_START,
VM_READ | VM_WRITE | VM_EXEC,
VM_READ | VM_WRITE,
__pfn(page_align_up(file->length)))) < 0) {
printf("do_mmap: failed with %d.\n", err);
goto error;
}
/* mmap each task's environment from its env file. */
if ((err = do_mmap(task->env_file, 0, task, task->env_start,
VM_READ | VM_WRITE,
if ((err = do_mmap(task->proc_files->env_file, 0, task,
task->env_start, VM_READ | VM_WRITE,
__pfn(task->env_end - task->env_start)) < 0)) {
printf("do_mmap: Mapping environment failed with %d.\n",
err);

29
tasks/mm0/src/vm_object.c Normal file
View File

@@ -0,0 +1,29 @@
/*
* VM Objects.
*
* Copyright (C) 2008 Bahadir Balban
*/
#include <vm_area.h>
#include <l4/macros.h>
#include <l4/api/errno.h>
#include <kmalloc/kmalloc.h>
/* Global list of in-memory vm files. */
LIST_HEAD(vm_object_list);
/* Allocate and initialise a vmfile, and return it */
struct vm_object *vm_object_alloc_init(void)
{
struct vm_object *obj;
if (!(obj = kzalloc(sizeof(*obj))))
return PTR_ERR(-ENOMEM);
INIT_LIST_HEAD(&obj->list);
INIT_LIST_HEAD(&obj->page_cache);
INIT_LIST_HEAD(&obj->shadows);
return obj;
}