Redesigned sys_open call

fs0 used to receive open() requests and notify pager about them via a syscall ipc.
This caused deadlocks because normally request flow is mm0 -> fs0 on all other calls.
The solution was to have mm0 ask and validate file descriptors from fs0 on the first
request instance that involved that file descriptor. By this method we delay the
validation of the fd until its first use, and avoid deadlock. It also fits well with
the lazy request handling design philosophy.
This commit is contained in:
Bahadir Balban
2008-09-16 15:50:00 +03:00
parent 398bc0c914
commit 510852b8b8
7 changed files with 189 additions and 138 deletions

View File

@@ -13,6 +13,8 @@ int sys_mkdir(l4id_t sender, const char *pathname, unsigned int mode);
int sys_chdir(l4id_t sender, const char *pathname);
/* Calls from pager that completes a posix call */
int pager_sys_open(l4id_t sender, l4id_t opener, int fd);
int pager_sys_read(l4id_t sender, unsigned long vnum, unsigned long f_offset,
unsigned long npages, void *pagebuf);

View File

@@ -88,6 +88,9 @@ void handle_fs_requests(void)
pager_sys_read(sender, (unsigned long)mr[0], (unsigned long)mr[1],
(unsigned long)mr[2], (void *)mr[3]);
break;
case L4_IPC_TAG_PAGER_OPEN:
pager_sys_open(sender, (l4id_t)mr[0], (int)mr[1]);
break;
case L4_IPC_TAG_PAGER_WRITE:
pager_sys_write(sender, (unsigned long)mr[0], (unsigned long)mr[1],
(unsigned long)mr[2], (void *)mr[3]);

View File

@@ -18,39 +18,50 @@
#include <alloca.h>
#include <path.h>
#define NILFD -1
/*
* This notifies mm0 that this is the fd that refers to this vnode number
* from now on. Note this is a one-way call.
* This informs mm0 about an opened file descriptors.
*
* MM0 *also* keeps track of fd's because mm0 is a better candidate
* for handling syscalls that access file content (i.e. read/write) since
* it maintains the page cache.
* it maintains the page cache. MM0 is not notified about opened files
* but is rather informed when it asks to be. This avoids deadlocks by
* keeping the request flow in one way.
*/
int pager_sys_open(l4id_t sender, int fd, unsigned long vnum, unsigned long size)
int pager_sys_open(l4id_t sender, l4id_t opener, int fd)
{
int err;
struct tcb *task;
struct vnode *v;
printf("%s/%s\n", __TASKNAME__, __FUNCTION__);
l4_save_ipcregs();
write_mr(L4SYS_ARG0, sender);
write_mr(L4SYS_ARG1, fd);
write_mr(L4SYS_ARG2, vnum);
write_mr(L4SYS_ARG3, size);
/* Tell pager about open request. Check ipc error. */
if ((err = l4_sendrecv(PAGER_TID, PAGER_TID, L4_IPC_TAG_PAGER_OPEN)) < 0) {
printf("%s: L4 IPC Error: %d.\n", __FUNCTION__, err);
return err;
/* Check if such task exists */
if (!(task = find_task(opener))) {
l4_ipc_return(-ESRCH);
return 0;
}
/* Check if syscall itself was successful */
if ((err = l4_get_retval()) < 0) {
printf("%s: Pager open Error: %d.\n", __FUNCTION__, fd);
return err;
/* Check if that fd has been opened */
if (task->fd[fd] == NILFD) {
l4_ipc_return(-EBADF);
return 0;
}
l4_restore_ipcregs();
/* Search the vnode by that vnum */
if (IS_ERR(v = vfs_lookup_byvnum(vfs_root.pivot->sb,
task->fd[fd]))) {
l4_ipc_return((int)v);
return 0;
}
/* Write file information */
write_mr(L4SYS_ARG0, v->vnum);
write_mr(L4SYS_ARG1, v->size);
/* Return ipc with success code */
l4_ipc_return(0);
return 0;
}
@@ -99,7 +110,7 @@ struct vnode *vfs_create(struct tcb *task, struct pathdata *pdata,
* Pager notifies vfs about a closed file descriptor.
*
* FIXME: fsync + close could be done under a single "close" ipc
* from pager. Currently there are 2 ipcs: 1 fsync + 1 fd close.
* from pager. Currently there are 2 ipcs: 1 fsync + 1 fd close.
*/
int pager_sys_close(l4id_t sender, l4id_t closer, int fd)
{
@@ -116,7 +127,7 @@ int pager_sys_close(l4id_t sender, l4id_t closer, int fd)
l4_ipc_return(err);
return 0;
}
task->fd[fd] = -1;
task->fd[fd] = NILFD;
l4_ipc_return(0);
return 0;
@@ -133,7 +144,7 @@ int sys_open(l4id_t sender, const char *pathname, int flags, unsigned int mode)
struct vnode *v;
struct tcb *task;
int fd;
int retval, err;
int retval;
// printf("%s/%s\n", __TASKNAME__, __FUNCTION__);
@@ -171,12 +182,6 @@ int sys_open(l4id_t sender, const char *pathname, int flags, unsigned int mode)
/* Assign the new fd with the vnode's number */
task->fd[fd] = v->vnum;
/* Tell the pager about opened vnode information */
if ((err = pager_sys_open(sender, fd, v->vnum, v->size)) < 0) {
retval = err;
goto out;
}
out:
pathdata_destroy(pdata);
l4_ipc_return(retval);

View File

@@ -4,6 +4,7 @@
#include <l4/lib/list.h>
#include <l4lib/types.h>
#include <posix/sys/types.h>
#include <task.h>
void vmfile_init(void);
@@ -19,6 +20,7 @@ int sys_write(l4id_t sender, int fd, void *buf, int count);
int sys_lseek(l4id_t sender, int fd, off_t offset, int whence);
int sys_close(l4id_t sender, int fd);
int sys_fsync(l4id_t sender, int fd);
int file_open(struct tcb *opener, int fd);
struct vfs_file_data {
unsigned long vnum;

View File

@@ -80,13 +80,6 @@ void handle_requests(void)
sys_shmdt(sender, (void *)mr[0]);
break;
case L4_IPC_TAG_PAGER_OPEN:
/* vfs opens a file and tells us about it here. */
vfs_receive_sys_open(sender, (l4id_t)mr[0], (int)mr[1],
(unsigned long)mr[2],
(unsigned long)mr[3]);
break;
case L4_IPC_TAG_UTCB:
task_send_utcb_address(sender, (l4id_t)mr[0]);
break;

View File

@@ -82,71 +82,106 @@ int vfs_read(unsigned long vnum, unsigned long file_offset,
/*
* When a new file is opened by the vfs this receives the information
* about that file so that it can serve that file's content (via
* read/write/mmap) later to that task.
* When a task does a read/write/mmap request on a file, if
* the file descriptor is unknown to the pager, this call
* asks vfs if that file has been opened, and any other
* relevant information.
*/
int vfs_receive_sys_open(l4id_t sender, l4id_t opener, int fd,
unsigned long vnum, unsigned long length)
int vfs_open(l4id_t opener, int fd, unsigned long *vnum, unsigned long *length)
{
struct vm_file *vmfile;
struct tcb *t;
int err;
printf("%s/%s\n", __TASKNAME__, __FUNCTION__);
/* Check argument validity */
if (sender != VFS_TID) {
l4_ipc_return(-EPERM);
return 0;
l4_save_ipcregs();
write_mr(L4SYS_ARG0, opener);
write_mr(L4SYS_ARG1, fd);
if ((err = l4_sendrecv(VFS_TID, VFS_TID, L4_IPC_TAG_PAGER_OPEN)) < 0) {
printf("%s: L4 IPC Error: %d.\n", __FUNCTION__, err);
return err;
}
if (!(t = find_task(opener))) {
l4_ipc_return(-EINVAL);
return 0;
/* Check if syscall was successful */
if ((err = l4_get_retval()) < 0) {
printf("%s: VFS open error: %d.\n",
__FUNCTION__, err);
return err;
}
if (fd < 0 || fd > TASK_FILES_MAX) {
l4_ipc_return(-EINVAL);
return 0;
}
/* Read file information */
*vnum = read_mr(L4SYS_ARG0);
*length = read_mr(L4SYS_ARG1);
l4_restore_ipcregs();
return err;
}
/* Initialise a new file and the descriptor for it from given file data */
int do_open(struct tcb *task, int fd, unsigned long vnum, unsigned long length)
{
struct vm_file *vmfile;
/* fd slot must be empty */
BUG_ON(task->files->fd[fd].vnum != 0);
BUG_ON(task->files->fd[fd].cursor != 0);
/* Assign vnum to given fd on the task */
t->files->fd[fd].vnum = vnum;
t->files->fd[fd].cursor = 0;
task->files->fd[fd].vnum = vnum;
task->files->fd[fd].cursor = 0;
/* Check if that vm_file is already in the list */
list_for_each_entry(vmfile, &vm_file_list, list) {
/* Check it is a vfs file and if so vnums match. */
/* Check whether it is a vfs file and if so vnums match. */
if ((vmfile->type & VM_FILE_VFS) &&
vm_file_to_vnum(vmfile) == vnum) {
/* Add a reference to it from the task */
t->files->fd[fd].vmfile = vmfile;
task->files->fd[fd].vmfile = vmfile;
vmfile->openers++;
l4_ipc_return(0);
return 0;
}
}
/* Otherwise allocate a new one for this vnode */
if (IS_ERR(vmfile = vfs_file_create())) {
l4_ipc_return((int)vmfile);
return 0;
}
if (IS_ERR(vmfile = vfs_file_create()))
return (int)vmfile;
/* Initialise and add a reference to it from the task */
vm_file_to_vnum(vmfile) = vnum;
vmfile->length = length;
vmfile->vm_obj.pager = &file_pager;
t->files->fd[fd].vmfile = vmfile;
task->files->fd[fd].vmfile = vmfile;
vmfile->openers++;
/* Add to global list */
list_add(&vmfile->vm_obj.list, &vm_file_list);
l4_ipc_return(0);
return 0;
}
int file_open(struct tcb *opener, int fd)
{
int err;
unsigned long vnum;
unsigned long length;
if (fd < 0 || fd > TASK_FILES_MAX)
return -EINVAL;
/* Ask vfs if such a file has been recently opened */
if ((err = vfs_open(opener->tid, fd, &vnum, &length)) < 0)
return err;
/* Initialise local structures with received file data */
if ((err = do_open(opener, fd, vnum, length)) < 0)
return err;
return 0;
}
/*
* Inserts the page to vmfile's list in order of page frame offset.
@@ -330,21 +365,18 @@ int flush_file_pages(struct vm_file *f)
}
/* Given a task and fd, syncs all IO on it */
int fsync_common(l4id_t sender, int fd)
int fsync_common(struct tcb *task, int fd)
{
struct vm_file *f;
struct tcb *task;
int err;
/* Get the task */
BUG_ON(!(task = find_task(sender)));
/* Check fd validity */
if (fd < 0 || fd > TASK_FILES_MAX || !task->files->fd[fd].vmfile)
return -EBADF;
if (!task->files->fd[fd].vmfile)
if ((err = file_open(task, fd)) < 0)
return err;
/* Finish I/O on file */
f = task->files->fd[fd].vmfile;
BUG_ON(!(f = task->files->fd[fd].vmfile));
if ((err = flush_file_pages(f)) < 0)
return err;
@@ -352,14 +384,10 @@ int fsync_common(l4id_t sender, int fd)
}
/* Closes the file descriptor and notifies vfs */
int fd_close(l4id_t sender, int fd)
int do_close(struct tcb *task, int fd)
{
struct tcb *task;
int err;
/* Get the task */
BUG_ON(!(task = find_task(sender)));
// printf("%s: Closing fd: %d on task %d\n", __FUNCTION__,
// fd, task->tid);
if ((err = vfs_close(task->tid, fd)) < 0)
@@ -378,15 +406,21 @@ int fd_close(l4id_t sender, int fd)
int sys_close(l4id_t sender, int fd)
{
int retval;
struct tcb *task;
if (!(task = find_task(sender))) {
l4_ipc_return(-ESRCH);
return 0;
}
/* Sync the file and update stats */
if ((retval = fsync_common(sender, fd)) < 0) {
if ((retval = fsync_common(task, fd)) < 0) {
l4_ipc_return(retval);
return 0;
}
/* Close the file descriptor. */
retval = fd_close(sender, fd);
retval = do_close(task, fd);
printf("%s: Closed fd %d. Returning %d\n",
__TASKNAME__, fd, retval);
l4_ipc_return(retval);
@@ -396,8 +430,20 @@ int sys_close(l4id_t sender, int fd)
int sys_fsync(l4id_t sender, int fd)
{
struct tcb *task;
int ret;
if (!(task = find_task(sender))) {
ret = -ESRCH;
goto out;
}
/* Sync the file and update stats */
return fsync_common(sender, fd);
ret = fsync_common(task, fd);
out:
l4_ipc_return(ret);
return 0;
}
/* FIXME: Add error handling to this */
@@ -561,40 +607,41 @@ int sys_read(l4id_t sender, int fd, void *buf, int count)
unsigned long pfn_start, pfn_end;
unsigned long cursor, byte_offset;
struct vm_file *vmfile;
int err, retval = 0;
struct tcb *task;
int ret = 0;
BUG_ON(!(task = find_task(sender)));
/* Check fd validity */
if (fd < 0 || fd > TASK_FILES_MAX || !task->files->fd[fd].vmfile) {
retval = -EBADF;
if (!(task = find_task(sender))) {
ret = -ESRCH;
goto out;
}
/* Check fd validity */
if (!task->files->fd[fd].vmfile)
if ((ret = file_open(task, fd)) < 0)
goto out;
/* Check count validity */
if (count < 0) {
retval = -EINVAL;
ret = -EINVAL;
goto out;
} else if (!count) {
retval = 0;
ret = 0;
goto out;
}
/* Check user buffer validity. */
if ((err = validate_task_range(task, (unsigned long)buf,
if ((ret = validate_task_range(task, (unsigned long)buf,
(unsigned long)(buf + count),
VM_READ)) < 0) {
retval = err;
VM_READ)) < 0)
goto out;
}
vmfile = task->files->fd[fd].vmfile;
cursor = task->files->fd[fd].cursor;
/* If cursor is beyond file end, simply return 0 */
if (cursor >= vmfile->length) {
retval = 0;
ret = 0;
goto out;
}
@@ -611,29 +658,26 @@ int sys_read(l4id_t sender, int fd, void *buf, int count)
count = vmfile->length - cursor;
/* Read the page range into the cache from file */
if ((err = read_file_pages(vmfile, pfn_start, pfn_end)) < 0) {
retval = err;
if ((ret = read_file_pages(vmfile, pfn_start, pfn_end)) < 0)
goto out;
}
/* The offset of cursor on first page */
byte_offset = PAGE_MASK & cursor;
/* Read it into the user buffer from the cache */
if ((count = read_cache_pages(vmfile, task, buf, pfn_start, pfn_end,
byte_offset, count)) < 0) {
retval = count;
byte_offset, count)) < 0) {
ret = count;
goto out;
}
/* Update cursor on success */
task->files->fd[fd].cursor += count;
retval = count;
ret = count;
out:
l4_ipc_return(retval);
l4_ipc_return(ret);
return 0;
}
/* FIXME:
@@ -649,33 +693,33 @@ int sys_write(l4id_t sender, int fd, void *buf, int count)
unsigned long pfn_nstart, pfn_nend; /* New pages start/end */
unsigned long cursor, byte_offset;
struct vm_file *vmfile;
int err = 0, retval = 0;
struct tcb *task;
int ret = 0;
BUG_ON(!(task = find_task(sender)));
/* Check fd validity */
if (fd < 0 || fd > TASK_FILES_MAX || !task->files->fd[fd].vmfile) {
retval = -EBADF;
if (!(task = find_task(sender))) {
ret = -ESRCH;
goto out;
}
/* Check fd validity */
if (!task->files->fd[fd].vmfile)
if ((ret = file_open(task, fd)) < 0)
goto out;
/* Check count validity */
if (count < 0) {
retval = -EINVAL;
ret = -EINVAL;
goto out;
} else if (!count) {
retval = 0;
ret = 0;
goto out;
}
/* Check user buffer validity. */
if ((err = validate_task_range(task, (unsigned long)buf,
if ((ret = validate_task_range(task, (unsigned long)buf,
(unsigned long)(buf + count),
VM_WRITE | VM_READ)) < 0) {
retval = err;
VM_WRITE | VM_READ)) < 0)
goto out;
}
vmfile = task->files->fd[fd].vmfile;
cursor = task->files->fd[fd].cursor;
@@ -724,27 +768,21 @@ int sys_write(l4id_t sender, int fd, void *buf, int count)
/*
* Read in the portion that's already part of the file.
*/
if ((err = read_file_pages(vmfile, pfn_fstart, pfn_fend)) < 0) {
retval = err;
if ((ret = read_file_pages(vmfile, pfn_fstart, pfn_fend)) < 0)
goto out;
}
/* Create new pages for the part that's new in the file */
if ((err = new_file_pages(vmfile, pfn_nstart, pfn_nend)) < 0) {
retval = err;
if ((ret = new_file_pages(vmfile, pfn_nstart, pfn_nend)) < 0)
goto out;
}
/*
* At this point be it new or existing file pages, all pages
* to be written are expected to be in the page cache. Write.
*/
byte_offset = PAGE_MASK & cursor;
if ((err = write_cache_pages(vmfile, task, buf, pfn_wstart,
pfn_wend, byte_offset, count)) < 0) {
retval = err;
if ((ret = write_cache_pages(vmfile, task, buf, pfn_wstart,
pfn_wend, byte_offset, count)) < 0)
goto out;
}
/*
* Update the file size, and cursor. vfs will be notified
@@ -755,10 +793,10 @@ int sys_write(l4id_t sender, int fd, void *buf, int count)
vmfile->length = task->files->fd[fd].cursor + count;
task->files->fd[fd].cursor += count;
retval = count;
ret = count;
out:
l4_ipc_return(retval);
l4_ipc_return(ret);
return 0;
}
@@ -769,14 +807,16 @@ int sys_lseek(l4id_t sender, int fd, off_t offset, int whence)
int retval = 0;
unsigned long long total, cursor;
BUG_ON(!(task = find_task(sender)));
/* Check fd validity */
if (fd < 0 || fd > TASK_FILES_MAX || !task->files->fd[fd].vmfile) {
retval = -EBADF;
if (!(task = find_task(sender))) {
retval = -ESRCH;
goto out;
}
/* Check fd validity */
if (!task->files->fd[fd].vmfile)
if ((retval = file_open(task, fd)) < 0)
goto out;
/* Offset validity */
if (offset < 0) {
retval = -EINVAL;

View File

@@ -7,11 +7,12 @@
#include <kmalloc/kmalloc.h>
#include INC_API(errno.h)
#include <posix/sys/types.h>
#include <task.h>
#include <mmap.h>
#include <memory.h>
#include <l4lib/arch/syscalls.h>
#include <l4lib/arch/syslib.h>
#include <task.h>
#include <mmap.h>
#include <file.h>
#include <memory.h>
#if 0
/* TODO: This is to be implemented when fs0 is ready. */
@@ -589,11 +590,16 @@ int sys_mmap(l4id_t sender, void *start, size_t length, int prot,
struct vm_file *file = 0;
unsigned int vmflags = 0;
struct tcb *task;
int err;
BUG_ON(!(task = find_task(sender)));
if (!(task = find_task(sender)))
return -ESRCH;
if ((fd < 0 && !(flags & MAP_ANONYMOUS)) || fd > TASK_FILES_MAX)
return -EINVAL;
/* Check fd validity */
if (!(flags & MAP_ANONYMOUS))
if (!task->files->fd[fd].vmfile)
if ((err = file_open(task, fd)) < 0)
return err;
if (base < task->start || base >= task->end)
return -EINVAL;