From 58b833dd7fb5a605aa9761578b9073944d991fb7 Mon Sep 17 00:00:00 2001 From: Bahadir Balban Date: Mon, 3 Mar 2008 22:05:01 +0000 Subject: [PATCH] Forks and COW situations show that we need vm objects rather than vm_files. This is the first commit towards implementing vm object based paging with right COW methods. --- TODO | 52 ++++++++ tasks/libl4/include/l4lib/ipcdefs.h | 1 + tasks/libposix/open.c | 4 +- tasks/mm0/include/proc.h | 12 +- tasks/mm0/include/task.h | 14 +-- tasks/mm0/include/vm_area.h | 67 ++++++---- tasks/mm0/src/file.c | 21 ---- tasks/mm0/src/init.c | 2 - tasks/mm0/src/proc.c | 189 ++++++++++++++++++---------- tasks/mm0/src/task.c | 20 ++- tasks/mm0/src/vm_object.c | 29 +++++ 11 files changed, 272 insertions(+), 139 deletions(-) create mode 100644 tasks/mm0/src/vm_object.c diff --git a/TODO b/TODO index c0032be..61d0839 100644 --- a/TODO +++ b/TODO @@ -400,6 +400,58 @@ Current todo: ============== - Use shmat/shmget/shmdt to map block device areas to FS0 and start implementing the VFS. + + + +todo: + +- Generate 4 vmfiles: +- env, stack, data, bss. + +- Fill in env as a private file. + As faults occur on env, simply map file to process. + +- Create an empty data, bss and stack file. + As faults occur on real data, copy on write onto proc->data file, by creating shadows. + As faults occur on devzero, copy on write onto proc->stack file, by creating shadows. + As faults occur on bss, copy on write onto proc->bss file, by creating shadows. + + FORK: + If a fork occurs, copy all vmas into new task. + Find all RW and VM_PRIVATE regions. All RW shadows are eligible. + Create a fork file for each RW/VM_PRIVATE region. E.g. + task->fork->data + task->fork->stack + task->fork->bss + + All RW/PRIVATE shadows become RO, with task->fork owners, rather than their original + owners e.g. proc->data, proc->stack etc. All pages under shadow are moved onto those files. + + Increase file refcount for forker tasks. + As faults occur on fork->stack/bss/data, copy on write onto proc->stack/bss/data, by making + shadows RW again and copying those faulted pages from fork files onto the proc->x files. + + + + + + + + + + + + + + + + + + + + + + diff --git a/tasks/libl4/include/l4lib/ipcdefs.h b/tasks/libl4/include/l4lib/ipcdefs.h index 2c1abf4..4997bb0 100644 --- a/tasks/libl4/include/l4lib/ipcdefs.h +++ b/tasks/libl4/include/l4lib/ipcdefs.h @@ -42,6 +42,7 @@ #define L4_IPC_TAG_MKDIR 19 #define L4_IPC_TAG_MMAP2 20 #define L4_IPC_TAG_CHDIR 21 +#define L4_IPC_TAG_FORK 22 /* Tags for ipc between fs0 and mm0 */ #define L4_IPC_TAG_TASKDATA 25 diff --git a/tasks/libposix/open.c b/tasks/libposix/open.c index 82a540d..38367b8 100644 --- a/tasks/libposix/open.c +++ b/tasks/libposix/open.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include INC_GLUE(memory.h) /* * Arguments that are too large to fit in message registers are @@ -30,7 +32,7 @@ static inline int l4_open(const char *pathname, int flags, mode_t mode) int fd; // write_mr(L4SYS_ARG0, (unsigned long)pathname); - copy_to_utcb(pathname, strlen(pathname)); + copy_to_utcb((void *)pathname, strlen(pathname)); write_mr(L4SYS_ARG1, flags); write_mr(L4SYS_ARG2, (u32)mode); diff --git a/tasks/mm0/include/proc.h b/tasks/mm0/include/proc.h index 9c8a2db..1c8d159 100644 --- a/tasks/mm0/include/proc.h +++ b/tasks/mm0/include/proc.h @@ -1,13 +1,15 @@ #ifndef __MM0_PROC__ #define __MM0_PROC__ +#include + struct proc_files { - struct vm_file *stackfile; /* ZI, private, devzero, then autogenerated */ - struct vm_file *envfile; /* NON-ZI, private, autogenerated, then autogenerated */ - struct vm_file *datafile; /* NON-ZI, private, real file, then autogenerated */ - struct vm_file *bssfile; /* ZI private, devzero, then autogenerated */ + struct vm_object *stack_file; /* ZI, RO: devzero, RW: private */ + struct vm_object *env_file; /* NON-ZI, RO: private, RW: private */ + struct vm_object *data_file; /* NON-ZI, RO: shared, RW: private */ + struct vm_object *bss_file; /* ZI, RO: devzero, RW: private */ }; -int task_prepare_procfiles(struct tcb *t); +int task_setup_vm_objects(struct tcb *t); #endif diff --git a/tasks/mm0/include/task.h b/tasks/mm0/include/task.h index aa62254..e165389 100644 --- a/tasks/mm0/include/task.h +++ b/tasks/mm0/include/task.h @@ -1,7 +1,7 @@ /* * Thread control block. * - * Copyright (C) 2007 Bahadir Balban + * Copyright (C) 2007, 2008 Bahadir Balban */ #ifndef __TASK_H__ #define __TASK_H__ @@ -18,9 +18,6 @@ #define TASK_OFILES_MAX 32 -/* Allow per-task anonymous memory to grow as much as 1 MB for now. */ -#define TASK_SWAPFILE_MAXSIZE SZ_1MB - struct vm_file; struct file_descriptor { @@ -78,12 +75,6 @@ struct tcb { /* File descriptors for this task */ struct file_descriptor fd[TASK_OFILES_MAX]; - - /* Per-task swap file for now */ - struct vm_file *swap_file; - - /* Pool to generate swap file offsets for fileless anonymous regions */ - struct address_pool swap_file_offset_pool; }; struct tcb *find_task(int tid); @@ -95,7 +86,4 @@ void dump_tasks(void); void send_task_data(l4id_t requester); -/* Used by servers that have a reference to tcbs (e.g. a pager) */ -#define current ((struct ktcb *)__L4_ARM_Utcb()->usr_handle) - #endif /* __TASK_H__ */ diff --git a/tasks/mm0/include/vm_area.h b/tasks/mm0/include/vm_area.h index 4348e94..e4ee8a9 100644 --- a/tasks/mm0/include/vm_area.h +++ b/tasks/mm0/include/vm_area.h @@ -28,15 +28,16 @@ #define VMA_ANON (1 << 4) /* Private copy of a file VMA, can be ZI */ #define VMA_COW (1 << 5) -/* This marks shadow vmas */ -#define VMA_SHADOW (1 << 6) + +/* VMA object type flags */ +#define VMOBJ_SHADOW (1 << 6) struct page { int count; /* Refcount */ struct spinlock lock; /* Page lock. */ - struct list_head list; /* For list of a file's in-memory pages */ + struct list_head list; /* For list of a vm_object's in-memory pages */ + struct vm_object *owner;/* The vm_object the page belongs to */ unsigned long virtual; /* If refs >1, first mapper's virtual address */ - struct vm_file *owner; /* The file it belongs to */ unsigned int flags; /* Flags associated with the page. */ unsigned long f_offset; /* The offset page resides in its owner */ }; @@ -59,8 +60,8 @@ struct fault_data { }; struct vm_pager_ops { - int (*read_page)(struct vm_file *f, unsigned long f_offset, void *pagebuf); - int (*write_page)(struct vm_file *f, unsigned long f_offset, void *pagebuf); + int (*page_in)(struct vm_object *vm_obj, unsigned long f_offset); + int (*page_out)(struct vm_object *vm_obj, unsigned long f_offset); }; /* Describes the pager task that handles a vm_area. */ @@ -69,35 +70,57 @@ struct vm_pager { }; /* - * Describes the in-memory representation of a file. This could - * point at a file or another resource, e.g. a device area or swapper space. + * Describes the in-memory representation of a resource. This could + * point at a file or another resource, e.g. a device area, swapper space, + * the anonymous internal state of a process, etc. This covers more than + * just files, e.g. during a fork, captures the state of internal shared + * copy of private pages for a process, which is really not a file. */ -struct vm_file { - int refcnt; - unsigned long vnum; /* Vnode number */ - unsigned long length; - struct list_head list; /* List of all vm files in memory */ - - /* This is the cache of physical pages that this file has in memory. */ - struct list_head page_cache_list; - struct vm_pager *pager; +struct vm_object { + int npages; /* Number of pages in memory */ + int vma_refcnt; /* Number of vmas that refer */ + int shadow_refcnt; /* Number of shadows that refer */ + struct list_head shadows; /* List of vm objects that shadow this one */ + struct vm_object *orig_vma; /* Original object that this one shadows */ + unsigned int type; /* Defines the type of the object */ + struct list_head list; /* List of all vm objects in memory */ + struct list_head page_cache;/* List of in-memory pages */ + struct vm_pager *pager; /* The pager for this object */ + union private_data { /* Private data about the object */ + struct vm_file *file; /* VFS file-specific information */ + } priv; }; +/* In memory representation of a vfs file. */ +struct vm_file { + unsigned long vnum; + unsigned long length; +}; + +/* To create per-vma vm_object lists */ +struct vma_obj_list { + struct list_head list; + struct vm_object *obj; +} /* * Describes a virtually contiguous chunk of memory region in a task. It covers * a unique virtual address area within its task, meaning that it does not * overlap with other regions in the same task. The region could be backed by a * file or various other resources. This is managed by the region's pager. + * + * COW: Upon copy-on-write, each copy-on-write instance creates a shadow of the + * original vma which supersedes the original vma with its copied modified pages. + * This creates a stack of shadow vmas, where the top vma's copy of pages + * supersede the ones lower in the stack. */ struct vm_area { - struct list_head list; /* Vma list */ - struct list_head shadow_list; /* Head for shadow list. See fault.c */ + struct list_head list; /* Per-task vma list */ + struct list_head vm_obj_list; /* Head for vm_object list. */ unsigned long pfn_start; /* Region start virtual pfn */ unsigned long pfn_end; /* Region end virtual pfn, exclusive */ unsigned long flags; /* Protection flags. */ unsigned long f_offset; /* File offset in pfns */ - struct vm_file *owner; /* File that backs the area. */ }; static inline struct vm_area *find_vma(unsigned long addr, @@ -112,8 +135,8 @@ static inline struct vm_area *find_vma(unsigned long addr, return 0; } -/* Adds a page to its vmfile's page cache in order of offset. */ -int insert_page_olist(struct page *this, struct vm_file *f); +/* Adds a page to its vm_objects's page cache in order of offset. */ +int insert_page_olist(struct page *this, struct vm_object *vm_obj); /* Pagers */ extern struct vm_pager default_file_pager; diff --git a/tasks/mm0/src/file.c b/tasks/mm0/src/file.c index 1e81b4b..682961f 100644 --- a/tasks/mm0/src/file.c +++ b/tasks/mm0/src/file.c @@ -14,27 +14,6 @@ #include #include -/* Global list of in-memory vm files. */ -struct list_head vm_file_list; - -/* Allocate and initialise a vmfile, and return it */ -struct vm_file *vmfile_alloc_init(void) -{ - struct vm_file *file; - - if (!(file = kzalloc(sizeof(*file)))) - return PTR_ERR(-ENOMEM); - - INIT_LIST_HEAD(&file->list); - INIT_LIST_HEAD(&file->page_cache_list); - - return file; -} - -void vmfile_init(void) -{ - INIT_LIST_HEAD(&vm_file_list); -} int vfs_read(unsigned long vnum, unsigned long f_offset, unsigned long npages, void *pagebuf) diff --git a/tasks/mm0/src/init.c b/tasks/mm0/src/init.c index 0516b69..f868b3f 100644 --- a/tasks/mm0/src/init.c +++ b/tasks/mm0/src/init.c @@ -62,8 +62,6 @@ void init_mm(struct initdata *initdata) init_utcb(); printf("%s: Initialised own utcb.\n", __TASKNAME__); - vmfile_init(); - /* Give the kernel some memory to use for its allocators */ l4_kmem_grant(__pfn(alloc_page(__pfn(SZ_1MB))), __pfn(SZ_1MB)); } diff --git a/tasks/mm0/src/proc.c b/tasks/mm0/src/proc.c index 72a6033..d3cfb2e 100644 --- a/tasks/mm0/src/proc.c +++ b/tasks/mm0/src/proc.c @@ -1,15 +1,6 @@ /* - * This implements a per-process virtual private file - * server to store environment variables. - * - * Using a per-process private file for the environment - * gives the impression as if a file-backed env/arg area - * is mapped on every process. By this means the env/arg - * pages dont need special processing and are abstracted - * away as files. Same idea can be applied to other - * private regions of a process such as the stack, so - * that debuggers can use file-based process inspection - * methods. + * Anonymous files for the process (e.g. stack, data, env) + * are implemented here. * * Copyright (C) 2008 Bahadir Balban */ @@ -24,80 +15,150 @@ #include #include -struct envdata { - struct list_head list; - void *env_data; - int env_size; - int id; -}; -LIST_HEAD(env_list); +static void *zpage_p; +static struct page *zpage; -/* Copies environment data into provided page. */ -int task_env_pager_read_page(struct vm_file *f, unsigned long f_off_pfn, - void *dest_page) +static struct vm_object devzero; + +void init_zero_page(void) { - struct envdata *env; + void *zpage_v; + zpage_p = alloc_page(1); + zpage = phys_to_page(zpage_p); - list_for_each_entry(env, &env_list, list) - if (env->id == f->vnum) - goto copyenv; + /* Map it to self */ + zpage_v = l4_map_helper(zpage_p, 1); - printf("%s: No such env id: %d, to copy environment for.\n", - __TASKNAME__, f->vnum); - return -EINVAL; + /* Zero it */ + memset(zpage_v, 0, PAGE_SIZE); -copyenv: - if (f_off_pfn != 0) { - printf("%s: Environments currently have a single page.\n"); - return -EINVAL; - } + /* Unmap it */ + l4_unmap_helper(zpage_v, 1); - memset(dest_page, 0, PAGE_SIZE); - BUG_ON(env->env_size > PAGE_SIZE); - memcpy(dest_page, env->env_data, env->env_size); + /* Update page struct. All other fields are zero */ + zpage->count++; +} + +#define VM_OBJ_MASK 0xFFFF +#define VM_OBJ_DEVZERO (1 << 0) /* Devzero special file */ +#define VM_OBJ_FILE (1 << 1) /* Regular VFS file */ +#define VM_OBJ_SHADOW (1 << 2) /* Shadow of another object */ + +/* Returns the page with given offset in this vm_object */ +struct page *devzero_pager_page_in(struct vm_object *vm_obj, unsigned long f_offset) +{ + return zpage; +} + +struct vm_pager devzero_pager { + page_in = devzero_pager_page_int, +}; + +void init_devzero(void) +{ + init_zero_page(); + + INIT_LIST_HEAD(&devzero.page_cache); + INIT_LIST_HEAD(&devzero.list); + INIT_LIST_HEAD(&devzero.shadows); + + /* Devzero has infinitely many pages ;-) */ + devzero.npages = -1; + devzero.type = VM_OBJ_FILE; + devzero.pager = &devzero_pager; +} + +struct vm_file *get_devzero(void) +{ + return &devzero; +} + +void *get_zero_page(void) +{ + zpage->count++; + return zpage_p; +} + +void put_zero_page(void) +{ + zpage->count--; + BUG_ON(zpage->count < 0); +} + +/* Allocates and fills in the env page. This is like a pre-faulted file. */ +int task_populate_env(struct task *task) +{ + void *paddr = alloc_page(1); + void *vaddr = phys_to_virt(paddr); + struct page *page = phys_to_page(paddr); + + /* Map new page at a self virtual address temporarily */ + l4_map(paddr, vaddr, 1, MAP_USR_RW_FLAGS, self_tid()); + + /* Clear the page */ + memset((void *)vaddr, 0, PAGE_SIZE); + + /* Fill in environment data */ + memcpy((void *)vaddr, &t->utcb_address, sizeof(t->utcb_address)); + + /* Remove temporary mapping */ + l4_unmap((void *)vaddr, 1, self_tid()); + + spin_lock(&page->lock); + + /* Environment file owns this page */ + page->owner = task->proc_files->env_file; + + /* Add the page to it's owner's list of in-memory pages */ + BUG_ON(!list_empty(&page->list)); + insert_page_olist(page, page->owner); + + /* The offset of this page in its owner file */ + page->f_offset = 0; + + page->count++; + page->virtual = 0; + spin_unlock(&page->lock); return 0; } -/* Pager for environment files */ -struct vm_pager task_env_pager = { - .ops = { - .read_page = task_env_pager_read_page, - .write_page= 0, - }, -}; + +#define TASK_DATA_VNUM 1 +#define TASK_STACK_VNUM 2 +#define TASK_ENV_VNUM 3 /* * For a task that is about to execute, this dynamically * generates its environment file, and environment data. */ -int task_prepare_environment(struct tcb *t) +int task_setup_vm_objects(struct tcb *t) { - struct envdata *env; + struct proc_files *pf = &t->proc_files; - /* Allocate a new vmfile for this task's environment */ - if (IS_ERR(t->env_file = vmfile_alloc_init())) + if (IS_ERR(pf->stack_file = vmfile_alloc_init())) + return (int)t->stack_file; + if (IS_ERR(pf->env_file = vmfile_alloc_init())) return (int)t->env_file; + if (IS_ERR(pf->env_file = vmfile_alloc_init())) + return (int)t->data_file; - /* Initialise and add it to global vmfile list */ - - /* - * NOTE: Temporarily we can use tid as the vnum because - * this is the only per-task file. - */ - t->env_file->vnum = t->tid; - t->env_file->length = PAGE_SIZE; - t->env_file->pager = &task_env_pager; + t->env_file->vnum = (t->tid << 16) | TASK_ENV_VNUM; + t->env_file->length = t->env_end - t->env_start; + t->env_file->pager = &task_anon_pager; list_add(&t->env_file->list, &vm_file_list); - /* Allocate, initialise and add per-task env data */ - BUG_ON(!(env = kzalloc(sizeof(struct envdata)))); - INIT_LIST_HEAD(&env->list); - env->env_data = &t->utcb_address; - env->env_size = sizeof(t->utcb_address); - env->id = t->tid; - list_add(&env->list, &env_list); + t->stack_file->vnum = (t->tid << 16) TASK_STACK_VNUM; + t->stack_file->length = t->stack_end - t->stack_start; + t->stack_file->pager = &task_anon_pager; + list_add(&t->stack_file->list, &vm_file_list); - return 0; + t->data_file->vnum = (t->tid << 16) TASK_DATA_VNUM; + t->data_file->length = t->data_end - t->data_start; + t->data_file->pager = &task_anon_pager; + list_add(&t->data_file->list, &vm_file_list); + + /* Allocate, initialise and add per-task env data */ + return task_populate_env(task); } diff --git a/tasks/mm0/src/task.c b/tasks/mm0/src/task.c index 3d36c7c..1a27682 100644 --- a/tasks/mm0/src/task.c +++ b/tasks/mm0/src/task.c @@ -64,11 +64,6 @@ struct tcb *create_init_tcb(struct tcb_head *tcbs) /* Ids will be acquired from the kernel */ task->tid = TASK_ID_INVALID; task->spid = TASK_ID_INVALID; - task->swap_file = kzalloc(sizeof(struct vm_file)); - task->swap_file->pager = &swap_pager; - address_pool_init(&task->swap_file_offset_pool, 0, - __pfn(TASK_SWAPFILE_MAXSIZE)); - INIT_LIST_HEAD(&task->swap_file->page_cache_list); INIT_LIST_HEAD(&task->list); INIT_LIST_HEAD(&task->vm_area_list); list_add_tail(&task->list, &tcbs->list); @@ -144,7 +139,7 @@ int start_boot_tasks(struct initdata *initdata, struct tcb_head *tcbs) * when faulted, simply copies the task env data to the * allocated page. */ - if (task_prepare_environment(task) < 0) { + if (task_prepare_proc_files(task) < 0) { printf("Could not create environment file.\n"); goto error; } @@ -156,8 +151,11 @@ int start_boot_tasks(struct initdata *initdata, struct tcb_head *tcbs) task->stack_end = task->env_start; task->stack_start = task->stack_end - PAGE_SIZE * 4; - /* Only text start is valid */ - task->text_start = USER_AREA_START; + /* Currently RO text and RW data are one region */ + task->data_start = USER_AREA_START; + task->data_end = USER_AREA_START + file->length; + task->text_start = task->data_start; + task->text_end = task->data_end; /* Set up task's registers */ sp = align(task->stack_end - 1, 8); @@ -165,15 +163,15 @@ int start_boot_tasks(struct initdata *initdata, struct tcb_head *tcbs) /* mmap each task's physical image to task's address space. */ if ((err = do_mmap(file, 0, task, USER_AREA_START, - VM_READ | VM_WRITE | VM_EXEC, + VM_READ | VM_WRITE, __pfn(page_align_up(file->length)))) < 0) { printf("do_mmap: failed with %d.\n", err); goto error; } /* mmap each task's environment from its env file. */ - if ((err = do_mmap(task->env_file, 0, task, task->env_start, - VM_READ | VM_WRITE, + if ((err = do_mmap(task->proc_files->env_file, 0, task, + task->env_start, VM_READ | VM_WRITE, __pfn(task->env_end - task->env_start)) < 0)) { printf("do_mmap: Mapping environment failed with %d.\n", err); diff --git a/tasks/mm0/src/vm_object.c b/tasks/mm0/src/vm_object.c new file mode 100644 index 0000000..856aaa8 --- /dev/null +++ b/tasks/mm0/src/vm_object.c @@ -0,0 +1,29 @@ +/* + * VM Objects. + * + * Copyright (C) 2008 Bahadir Balban + */ +#include +#include +#include +#include + + +/* Global list of in-memory vm files. */ +LIST_HEAD(vm_object_list); + +/* Allocate and initialise a vmfile, and return it */ +struct vm_object *vm_object_alloc_init(void) +{ + struct vm_object *obj; + + if (!(obj = kzalloc(sizeof(*obj)))) + return PTR_ERR(-ENOMEM); + + INIT_LIST_HEAD(&obj->list); + INIT_LIST_HEAD(&obj->page_cache); + INIT_LIST_HEAD(&obj->shadows); + + return obj; +} +