From d67d6b84a958733e91ef850bcaabbb728ea7bcd2 Mon Sep 17 00:00:00 2001 From: Bahadir Balban Date: Mon, 18 Feb 2008 22:26:39 +0000 Subject: [PATCH] Wiring between mm0 page cache and vfs almost what it should look like. This implements the infrastructure for read/write system calls where file content is first searched in mm0's page cache and then read-in or written via the vfs read/write functions. modified: tasks/fs0/src/syscalls.c modified: tasks/mm0/include/lib/bit.h modified: tasks/mm0/include/lib/idpool.h modified: tasks/mm0/include/task.h modified: tasks/mm0/include/vm_area.h modified: tasks/mm0/main.c modified: tasks/mm0/src/devzero.c modified: tasks/mm0/src/fault.c new file: tasks/mm0/src/file.c modified: tasks/mm0/src/init.c modified: tasks/mm0/src/lib/bit.c modified: tasks/mm0/src/lib/idpool.c modified: tasks/mm0/src/task.c --- tasks/fs0/src/syscalls.c | 5 +- tasks/mm0/include/lib/bit.h | 1 + tasks/mm0/include/lib/idpool.h | 3 + tasks/mm0/include/task.h | 13 +- tasks/mm0/include/vm_area.h | 18 ++- tasks/mm0/main.c | 15 +++ tasks/mm0/src/devzero.c | 2 +- tasks/mm0/src/fault.c | 91 ++++++++++---- tasks/mm0/src/file.c | 222 +++++++++++++++++++++++++++++++++ tasks/mm0/src/init.c | 4 + tasks/mm0/src/lib/bit.c | 12 ++ tasks/mm0/src/lib/idpool.c | 15 +++ tasks/mm0/src/task.c | 21 +--- 13 files changed, 365 insertions(+), 57 deletions(-) create mode 100644 tasks/mm0/src/file.c diff --git a/tasks/fs0/src/syscalls.c b/tasks/fs0/src/syscalls.c index 1e3c380..92b7acd 100644 --- a/tasks/fs0/src/syscalls.c +++ b/tasks/fs0/src/syscalls.c @@ -25,13 +25,14 @@ * for handling syscalls that access file content (i.e. read/write) since * it maintains the page cache. */ -int send_pager_sys_open(l4id_t sender, int fd, unsigned long vnum) +int send_pager_sys_open(l4id_t sender, int fd, unsigned long vnum, unsigned long size) { int err; write_mr(L4SYS_ARG0, sender); write_mr(L4SYS_ARG1, fd); write_mr(L4SYS_ARG2, vnum); + write_mr(L4SYS_ARG3, size); if ((err = l4_send(PAGER_TID, L4_IPC_TAG_PAGER_SYSOPEN)) < 0) { printf("%s: L4 IPC Error: %d.\n", __FUNCTION__, err); @@ -104,7 +105,7 @@ int sys_open(l4id_t sender, const char *pathname, int flags, unsigned int mode) t->fd[fd] = v->vnum; /* Tell the pager about opened vnode information */ - BUG_ON(send_pager_sys_open(sender, fd, v->vnum) < 0); + BUG_ON(send_pager_sys_open(sender, fd, v->vnum, v->size) < 0); return 0; } diff --git a/tasks/mm0/include/lib/bit.h b/tasks/mm0/include/lib/bit.h index bccc5df..ab58671 100644 --- a/tasks/mm0/include/lib/bit.h +++ b/tasks/mm0/include/lib/bit.h @@ -9,6 +9,7 @@ int find_and_set_first_free_contig_bits(u32 *word, unsigned int limit, int nbits); int check_and_clear_bit(u32 *word, int bit); int check_and_clear_contig_bits(u32 *word, int first, int nbits); +int check_and_set_bit(u32 *word, int bit); /* Set */ diff --git a/tasks/mm0/include/lib/idpool.h b/tasks/mm0/include/lib/idpool.h index 3400849..fda7268 100644 --- a/tasks/mm0/include/lib/idpool.h +++ b/tasks/mm0/include/lib/idpool.h @@ -2,8 +2,10 @@ #define __MM0_IDPOOL_H__ #include +#include struct id_pool { + struct spinlock lock; int nwords; u32 bitmap[]; }; @@ -11,6 +13,7 @@ struct id_pool { struct id_pool *id_pool_new_init(int mapsize); int id_new(struct id_pool *pool); int id_del(struct id_pool *pool, int id); +int id_get(struct id_pool *pool, int id); int ids_new_contiguous(struct id_pool *pool, int numids); int ids_del_contiguous(struct id_pool *pool, int first, int numids); diff --git a/tasks/mm0/include/task.h b/tasks/mm0/include/task.h index 83c3fd5..6fd0440 100644 --- a/tasks/mm0/include/task.h +++ b/tasks/mm0/include/task.h @@ -13,13 +13,21 @@ #include #include -#define __TASKNAME__ "mm0" +#define __TASKNAME__ __PAGERNAME__ + +#define TASK_OFILES_MAX 32 /* Allow per-task anonymous memory to grow as much as 1 MB for now. */ #define TASK_SWAPFILE_MAXSIZE SZ_1MB struct vm_file; +struct file_descriptor { + unsigned long vnum; + unsigned long cursor; + struct vm_file *vmfile; +}; + /* Stores all task information that can be kept in userspace. */ struct tcb { /* Task list */ @@ -53,6 +61,9 @@ struct tcb { /* Per-task swap file for now */ struct vm_file *swap_file; + /* File descriptors for this task */ + struct file_descriptor fd[TASK_OFILES_MAX]; + /* Pool to generate swap file offsets for fileless anonymous regions */ struct id_pool *swap_file_offset_pool; }; diff --git a/tasks/mm0/include/vm_area.h b/tasks/mm0/include/vm_area.h index 0a648e5..99a68ad 100644 --- a/tasks/mm0/include/vm_area.h +++ b/tasks/mm0/include/vm_area.h @@ -59,8 +59,8 @@ struct fault_data { }; struct vm_pager_ops { - void (*read_page)(struct fault_data *f, void *); - void (*write_page)(struct fault_data *f, void *); + int (*read_page)(struct vm_file *f, unsigned long f_offset, void *pagebuf); + int (*write_page)(struct vm_file *f, unsigned long f_offset, void *pagebuf); }; /* Describes the pager task that handles a vm_area. */ @@ -68,27 +68,22 @@ struct vm_pager { struct vm_pager_ops ops; /* The ops the pager does on area */ }; -/* - * TODO: Since there's no vfs yet, an inode's i_addr field is the - * virtual memory address of a file which uniquely identifies that file. - */ -struct inode { - unsigned long i_addr; /* The unique, global resource id. */ -}; - /* * Describes the in-memory representation of a file. This could * point at a file or another resource, e.g. a device area or swapper space. */ struct vm_file { - struct inode inode; + int refcnt; + unsigned long vnum; /* Vnode number */ unsigned long length; struct list_head list; /* List of all vm files in memory */ + /* This is the cache of physical pages that this file has in memory. */ struct list_head page_cache_list; struct vm_pager *pager; }; + /* * Describes a virtually contiguous chunk of memory region in a task. It covers * a unique virtual address area within its task, meaning that it does not @@ -119,6 +114,7 @@ static inline struct vm_area *find_vma(unsigned long addr, /* Pagers */ extern struct vm_pager default_file_pager; +extern struct vm_pager boot_file_pager; extern struct vm_pager swap_pager; diff --git a/tasks/mm0/main.c b/tasks/mm0/main.c index bc9ef1c..fdb479a 100644 --- a/tasks/mm0/main.c +++ b/tasks/mm0/main.c @@ -18,6 +18,7 @@ #include #include #include +#include /* FIXME:LOCKING:FIXME:LOCKING:FIXME:LOCKING * NOTE: For multithreadded MM0, not suprisingly, we need locking on @@ -83,6 +84,20 @@ void handle_requests(void) sys_shmdt(sender, (void *)mr[0]); break; + case L4_IPC_TAG_PAGER_SYSOPEN: + /* vfs opens a file and tells us about it here. */ + vfs_receive_sys_open(sender, (l4id_t)mr[0], (int)mr[1], + (unsigned long)mr[2], (unsigned long)mr[3]); + break; + + case L4_IPC_TAG_READ: + sys_read(sender, (int)mr[0], (void *)mr[1], (int)mr[2]); + break; + + case L4_IPC_TAG_WRITE: + sys_write(sender, (int)mr[0], (void *)mr[1], (int)mr[2]); + break; + case L4_IPC_TAG_MMAP: { struct sys_mmap_args *args = (struct sys_mmap_args *)&mr[0]; BUG(); /* FIXME: There are 8 arguments to ipc whereas there are 7 mrs available. Fix this by increasing MRs to 8 ??? */ diff --git a/tasks/mm0/src/devzero.c b/tasks/mm0/src/devzero.c index c003d37..fba0514 100644 --- a/tasks/mm0/src/devzero.c +++ b/tasks/mm0/src/devzero.c @@ -43,7 +43,7 @@ void init_devzero(void) INIT_LIST_HEAD(&devzero.list); INIT_LIST_HEAD(&devzero.page_cache_list); devzero.length = (unsigned int)-1; - devzero.inode.i_addr = -1; + devzero.vnum = -1; } struct vm_file *get_devzero(void) diff --git a/tasks/mm0/src/fault.c b/tasks/mm0/src/fault.c index b8e2ad1..da283e5 100644 --- a/tasks/mm0/src/fault.c +++ b/tasks/mm0/src/fault.c @@ -16,13 +16,25 @@ #include #include #include +#include -/* FIXME: TODO: +/* FIXME: FIXME: FIXME: FIXME: FIXME: FIXME: FIXME: FIXME: TODO: * For every page that is allocated, (read-only file pages) and anon pages * etc. Page cache for that page's file must be visited first, before * allocation. */ +unsigned long fault_to_file_offset(struct fault_data *fault) +{ + /* Fault's offset in its vma */ + unsigned long vma_off_pfn = __pfn(fault->address) - fault->vma->pfn_start; + + /* Fault's offset in the file */ + unsigned long f_off_pfn = fault->vma->f_offset + vma_off_pfn; + + return f_off_pfn; +} + /* * For copy-on-write vmas, grows an existing shadow vma, or creates a new one * for the copy-on-write'ed page. Then adds this shadow vma to the actual vma's @@ -87,6 +99,7 @@ int do_file_page(struct fault_data *fault) void *paddr = alloc_page(1); void *vaddr = phys_to_virt(paddr); struct page *page = phys_to_page(paddr); + unsigned long f_offset = fault_to_file_offset(fault); /* Map new page at a self virtual address temporarily */ l4_map(paddr, vaddr, 1, MAP_USR_RW_FLAGS, self_tid()); @@ -95,7 +108,8 @@ int do_file_page(struct fault_data *fault) * Read the page. (Simply read into the faulty area that's * now mapped using a newly allocated page.) */ - fault->vma->owner->pager->ops.read_page(fault, vaddr); + fault->vma->owner->pager->ops.read_page(fault->vma->owner, + f_offset, vaddr); /* Remove temporary mapping */ l4_unmap(vaddr, 1, self_tid()); @@ -191,6 +205,7 @@ int do_file_page(struct fault_data *fault) void *paddr = alloc_page(1); void *vaddr = phys_to_virt(paddr); struct page *page = phys_to_page(paddr); + unsigned long f_offset = fault_to_file_offset(fault); /* Map it to self */ l4_map(paddr, vaddr, 1, MAP_USR_RW_FLAGS, self_tid()); @@ -206,7 +221,8 @@ int do_file_page(struct fault_data *fault) * Read the page. (Simply read into the faulty area that's * now mapped using a newly allocated page.) */ - fault->vma->owner->pager->ops.read_page(fault, vaddr); + fault->vma->owner->pager->ops.read_page(fault->vma->owner, + f_offset, vaddr); /* Unmap from self */ l4_unmap(vaddr, 1, self_tid()); @@ -403,17 +419,43 @@ int do_page_fault(struct fault_data *fault) return 0; } -void vm_file_pager_read_page(struct fault_data *fault, void *dest_page) +int file_pager_read_page(struct vm_file *f, unsigned long f_offset, void *dest_page) { - /* Fault's offset in its vma */ - unsigned long vma_off_pfn = __pfn(fault->address) - fault->vma->pfn_start; + int err; - /* Fault's offset in the file */ - unsigned long f_off_pfn = fault->vma->f_offset + vma_off_pfn; + /* Map the page to vfs task (shared mapping) */ + l4_map(virt_to_phys(dest_page), dest_page, 1, MAP_USR_RW_FLAGS, VFS_TID); + /* vfs reads into the page. */ + err = vfs_read(f->vnum, f_offset, 1, dest_page); + + /* Unmap it from vfs */ + l4_unmap(dest_page, 1, VFS_TID); + + return err; +} + +int file_pager_write_page(struct vm_file *f, unsigned long f_offset, void *src_page) +{ + int err; + + /* Map the page to vfs task (shared mapping) */ + l4_map(virt_to_phys(src_page), src_page, 1, MAP_USR_RW_FLAGS, VFS_TID); + + /* write the page via vfs. */ + err = vfs_write(f->vnum, f_offset, 1, src_page); + + /* Unmap it from vfs */ + l4_unmap(src_page, 1, VFS_TID); + + return err; +} + +int boot_pager_read_page(struct vm_file *f, unsigned long f_off_pfn, + void *dest_page) +{ /* The address of page in the file */ - void *file_page = (void *)(fault->vma->owner->inode.i_addr + - __pfn_to_addr(f_off_pfn)); + void *file_page = (void *)(f->vnum + __pfn_to_addr(f_off_pfn)); /* * Map the memfile's page into virtual memory. @@ -425,37 +467,34 @@ void vm_file_pager_read_page(struct fault_data *fault, void *dest_page) /* Copy it into destination page */ memcpy(dest_page, file_page, PAGE_SIZE); + + return 0; } -void vm_file_pager_write_page(struct fault_data *f, void *p) -{ - -} - -void vm_swapper_read_page(struct fault_data *fault, void *p) -{ - -} - -void vm_swapper_write_page(struct fault_data *f, void *p) { } +/* Pager for boot files read from sys_kdata() */ +struct vm_pager boot_file_pager = { + .ops = { + .read_page = boot_pager_read_page, + .write_page= 0, + }, +}; /* Pager for file pages */ struct vm_pager default_file_pager = { .ops = { - .read_page = vm_file_pager_read_page, - .write_page= vm_file_pager_write_page, + .read_page = file_pager_read_page, + .write_page= 0, }, }; /* Swap pager for anonymous and private pages */ struct vm_pager swap_pager = { .ops = { - .read_page = vm_swapper_read_page, - .write_page= vm_swapper_write_page, + .read_page = 0, + .write_page= 0, }, }; - void page_fault_handler(l4id_t sender, fault_kdata_t *fkdata) { struct fault_data fault = { diff --git a/tasks/mm0/src/file.c b/tasks/mm0/src/file.c new file mode 100644 index 0000000..07a4f8e --- /dev/null +++ b/tasks/mm0/src/file.c @@ -0,0 +1,222 @@ +/* + * Copyright (C) 2008 Bahadir Balban + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Global list of in-memory vm files. */ +struct list_head vm_file_list; + +/* Allocate and initialise a vmfile, and return it */ +struct vm_file *vmfile_alloc_init(void) +{ + struct vm_file *file; + + if (!(file = kzalloc(sizeof(*file)))) + return PTR_ERR(-ENOMEM); + + INIT_LIST_HEAD(&file->list); + INIT_LIST_HEAD(&file->page_cache_list); + + return file; +} + +void vmfile_init(void) +{ + INIT_LIST_HEAD(&vm_file_list); +} + +int vfs_read(unsigned long vnum, unsigned long f_offset, unsigned long npages, + void *pagebuf) +{ + return 0; +} + +int vfs_write(unsigned long vnum, unsigned long f_offset, unsigned long npages, + void *pagebuf) +{ + return 0; +} + +/* + * When a new file is opened by the vfs this receives the information + * about that file so that it can serve that file's content (via + * read/write/mmap) later to that task. + */ +int vfs_receive_sys_open(l4id_t sender, l4id_t opener, int fd, + unsigned long vnum, unsigned long length) +{ + struct vm_file *vmfile; + struct tcb *t; + + /* Check argument validity */ + if (sender != VFS_TID) + return -EPERM; + + if (!(t = find_task(opener))) + return -EINVAL; + + if (fd < 0 || fd > TASK_OFILES_MAX) + return -EINVAL; + + /* Assign vnum to given fd on the task */ + t->fd[fd].vnum = vnum; + t->fd[fd].cursor = 0; + + /* Check if that vm_file is already in the list */ + list_for_each_entry(vmfile, &vm_file_list, list) { + if (vmfile->vnum == vnum) { + /* Add a reference to it from the task */ + t->fd[fd].vmfile = vmfile; + vmfile->refcnt++; + return 0; + } + } + + /* Otherwise allocate a new one for this vnode */ + if (IS_ERR(vmfile = vmfile_alloc_init())) + return (int)vmfile; + + /* Initialise and add it to global list */ + vmfile->vnum = vnum; + vmfile->length = length; + vmfile->pager = &default_file_pager; + list_add(&vmfile->list, &vm_file_list); + + return 0; +} + +/* TODO: Implement this */ +struct page *find_page(struct vm_file *f, unsigned long pfn) +{ + struct page *p; + + list_for_each_entry(p, &f->page_cache_list, list) { + if (p->f_offset == pfn) + return p; + } + return 0; +} + +/* + * This reads-in a range of pages from a file just like a page fault, + * but its not in the page fault path. + */ +int read_file_pages(struct vm_file *vmfile, unsigned long pfn_start, + unsigned long pfn_end) +{ + struct page *page; + + for (int f_offset = pfn_start; f_offset < pfn_end; f_offset++) { + /* The page is not resident in page cache. */ + if (!(page = find_page(vmfile, f_offset))) { + /* Allocate a new page */ + void *paddr = alloc_page(1); + void *vaddr = phys_to_virt(paddr); + page = phys_to_page(paddr); + + /* Map new page at a self virtual address temporarily */ + l4_map(paddr, vaddr, 1, MAP_USR_RW_FLAGS, self_tid()); + + /* Read-in the page using the file's pager */ + vmfile->pager->ops.read_page(vmfile, f_offset, vaddr); + + spin_lock(&page->lock); + page->count++; + page->owner = vmfile; + page->f_offset = f_offset; + + /* TODO: + * Page is not mapped into any address space except mm0. + * Shall we add mm0 vaddr here ??? + */ + page->virtual = 0; + + /* Add the page to owner's list of in-memory pages */ + BUG_ON(!list_empty(&page->list)); + list_add(&page->list, &vmfile->page_cache_list); + spin_unlock(&page->lock); + } + } + + return 0; +} + +int sys_read(l4id_t sender, int fd, void *buf, int count) +{ + unsigned long foff_pfn_start, foff_pfn_end; + struct vm_file *vmfile; + struct tcb *t; + unsigned long cursor; + int err; + + BUG_ON(!(t = find_task(sender))); + + /* TODO: Check user buffer and count validity */ + if (fd < 0 || fd > TASK_OFILES_MAX) + return -EINVAL; + + vmfile = t->fd[fd].vmfile; + cursor = t->fd[fd].cursor; + + foff_pfn_start = __pfn(cursor); + foff_pfn_end = __pfn(page_align_up(cursor + count)); + + if ((err = read_file_pages(vmfile, foff_pfn_start, foff_pfn_end) < 0)) + return err; + + /* + * FIXME: If vmfiles are mapped contiguously on mm0, then these reads + * can be implemented as a straightforward copy as below. + * + * The problem is that in-memrory file pages are usually non-contiguous. + * memcpy(buf, (void *)(vmfile->base + cursor), count); + * + */ + + return 0; +} + +int sys_write(l4id_t sender, int fd, void *buf, int count) +{ + + return 0; +} + +/* FIXME: Check for invalid cursor values */ +int sys_lseek(l4id_t sender, int fd, off_t offset, int whence) +{ + struct tcb *t; + + BUG_ON(!(t = find_task(sender))); + + if (offset < 0) + return -EINVAL; + + switch (whence) { + case SEEK_SET: + t->fd[fd].cursor = offset; + break; + case SEEK_CUR: + t->fd[fd].cursor += offset; + break; + case SEEK_END: + t->fd[fd].cursor = t->fd[fd].vmfile->length + offset; + break; + default: + return -EINVAL; + } + return 0; +} + + + diff --git a/tasks/mm0/src/init.c b/tasks/mm0/src/init.c index 9ff7cb9..6b5f422 100644 --- a/tasks/mm0/src/init.c +++ b/tasks/mm0/src/init.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include void init_utcb(void) { @@ -46,6 +48,8 @@ void init_mm(struct initdata *initdata) shm_init(); printf("%s: Initialised shm structures.\n", __TASKNAME__); + vmfile_init(); + /* Give the kernel some memory to use for its allocators */ l4_kmem_grant(__pfn(alloc_page(__pfn(SZ_1MB))), __pfn(SZ_1MB)); } diff --git a/tasks/mm0/src/lib/bit.c b/tasks/mm0/src/lib/bit.c index 7190e5b..af089a0 100644 --- a/tasks/mm0/src/lib/bit.c +++ b/tasks/mm0/src/lib/bit.c @@ -88,6 +88,18 @@ int check_and_clear_bit(u32 *word, int bit) } } +int check_and_set_bit(u32 *word, int bit) +{ + /* Check that bit was clear */ + if (!(word[BITWISE_GETWORD(bit)] & BITWISE_GETBIT(bit))) { + word[BITWISE_GETWORD(bit)] |= BITWISE_GETBIT(bit); + return 0; + } else { + //printf("Trying to set already set bit\n"); + return -1; + } +} + int check_and_clear_contig_bits(u32 *word, int first, int nbits) { for (int i = first; i < first + nbits; i++) diff --git a/tasks/mm0/src/lib/idpool.c b/tasks/mm0/src/lib/idpool.c index 136ad56..f10d4e4 100644 --- a/tasks/mm0/src/lib/idpool.c +++ b/tasks/mm0/src/lib/idpool.c @@ -61,3 +61,18 @@ int id_del(struct id_pool *pool, int id) return ret; } +/* Return a specific id, if available */ +int id_get(struct id_pool *pool, int id) +{ + int ret; + + spin_lock(&pool->lock); + ret = check_and_set_bit(pool->bitmap, id); + spin_unlock(&pool->lock); + + if (ret < 0) + return ret; + else + return id; +} + diff --git a/tasks/mm0/src/task.c b/tasks/mm0/src/task.c index 097bb01..2cdee93 100644 --- a/tasks/mm0/src/task.c +++ b/tasks/mm0/src/task.c @@ -20,7 +20,7 @@ #include #include #include - +#include struct tcb_head { struct list_head list; @@ -73,18 +73,6 @@ struct tcb *create_init_tcb(struct tcb_head *tcbs) return task; } -/* Create temporary run-time files in memory to test with mmap */ -struct vm_file *create_init_vmfile(struct list_head *vmfile_head) -{ - struct vm_file *file = kzalloc(sizeof(*file)); - - INIT_LIST_HEAD(&file->list); - INIT_LIST_HEAD(&file->page_cache_list); - list_add(&file->list, vmfile_head); - - return file; -} - int start_boot_tasks(struct initdata *initdata, struct tcb_head *tcbs) { int err; @@ -119,16 +107,17 @@ int start_boot_tasks(struct initdata *initdata, struct tcb_head *tcbs) pc = USER_AREA_START; /* Create vm file and tcb */ - file = create_init_vmfile(&initdata->boot_file_list); + file = vmfile_alloc_init(); task = create_init_tcb(tcbs); /* * For boot files, we use the physical address of the memory * file as its mock-up inode. */ - file->inode.i_addr = img->phys_start; + file->vnum = img->phys_start; file->length = img->phys_end - img->phys_start; - file->pager = &default_file_pager; + file->pager = &boot_file_pager; + list_add(&file->list, &initdata->boot_file_list); /* mmap each task's physical image to task's address space. */ if ((err = do_mmap(file, 0, task, USER_AREA_START,