From 69db3a04c0a418c6a3c4c2493da54cbf81505b16 Mon Sep 17 00:00:00 2001
From: Bahadir Balban <bahadir@bahadir-laptop.(none)>
Date: Tue, 19 Aug 2008 18:03:23 +0300
Subject: [PATCH] Towards implementing fork.

Issue is that shadow object references from original objects are into
the links rather than the objects.
---
 README                      | 29 +++++------
 include/l4/api/thread.h     | 21 ++++++--
 include/l4/arch/arm/v5/mm.h |  1 +
 src/api/thread.c            | 79 +++++++++++++++---------------
 src/arch/arm/v5/mm.c        | 36 ++++++++++++++
 tasks/mm0/include/task.h    |  1 +
 tasks/mm0/include/vm_area.h | 13 ++++-
 tasks/mm0/src/clone.c       | 96 +++++++++++++++++++++++++++----------
 tasks/mm0/src/fault.c       | 63 +++++++++++++++++++++++-
 tasks/mm0/src/mmap.c        |  6 +--
 tasks/mm0/src/task.c        |  6 +--
 11 files changed, 258 insertions(+), 93 deletions(-)

diff --git a/README b/README
index ca9fed4..333c01a 100644
--- a/README
+++ b/README
@@ -92,10 +92,6 @@ There are many open source POSIX operating systems with advanced features such
 as BSD versions and Linux. However, neither of these were originally designed
 for embedded systems. Multiple problems arise due to this fact.
 
-These systems are well established. They target a broad range of platforms and
-uses, but consequently their user base has saturated, and embedded platforms
-don't get enough emphasis.
-
 Unix itself and all the tools built upon weren't meant for using on small
 devices. Accordingly, these operating systems contain a lot of historical code.
 Their code base is so big, that it gets more and more difficult to understand
@@ -110,7 +106,13 @@ address space. This is an important issue on embedded systems since their
 operation is more sensitive to disruptions. Being a microkernel design, Codezero
 aims to defeat this problem and increase dependability.
 
-Other than these modern kernels, there are existing operating systems targeting
+From a support perspective, most unix operating systems like BSD and linux have
+a highly saturated user base. The developers focus on these existing users and
+often the systems they support are servers and not embedded computers. Codezero
+will focus completely on embedded systems, aiming to meet the support need for
+this type of systems.
+
+Other than modern unix kernels, there are existing operating systems targeting
 embedded devices. Most of them are proprietary, with their own users. Some of
 them are structurally too simplistic, and lack modern features such as paging.
 There ones that are well established, but Codezero will contrast them by
@@ -123,14 +125,13 @@ Finally, POSIX compliance is only a step, or a partial aim for the Codezero
 microkernel. It is not limited to the goal of just complying with POSIX, which
 has been done many times by other operating systems. The idea is to implement
 a generic software environment where multiple system services can reside in
-the same run-time, but on the other hand, provide natively implemented resource
-management services to be used as the default solution. In other words, the
-project will provide the mechanism to accomodate multiple operating systems,
-and it will also supply its own set of system services with a POSIX-like API.
-By providing a variety of system-level software options, the applications
-will be able to choose among different speed, safety, determinism policies at
-the same run-time. This is expected to prove useful in embedded software
-problems.
+the same run-time, but on the other hand, natively implemented system services
+will be supplied as the default solution. In other words, the project will
+provide the mechanism to accomodate multiple operating systems, and it will also
+supply its own set of system services with a POSIX-like API. By providing a
+variety of system software options, the applications will be able to choose
+among different speed, safety, determinism policies at the same run-time. This
+is expected to prove useful in the embedded software domain.
 
 Furthermore there are new ideas in literature that would improve systems
 software but aren't implemented either because they have no existing users or
@@ -148,7 +149,7 @@ technology.
 Can you summarise all this? Why should I use Codezero, again?
 
 Codezero is an operating system that targets embedded systems. It supports the
-most fundamental posix calls and it implements modern features such as
+most fundamental POSIX calls and it implements modern features such as
 demand-paging and virtual filesystem layer. Different from most other posix-like
 systems, it is based on a microkernel design. This makes it possible to use it
 also as a base for implementing or running other operating systems. It has a
diff --git a/include/l4/api/thread.h b/include/l4/api/thread.h
index 7aaf2a4..8ae82dd 100644
--- a/include/l4/api/thread.h
+++ b/include/l4/api/thread.h
@@ -1,9 +1,22 @@
 #ifndef __THREAD_H__
 #define __THREAD_H__
 
-#define THREAD_CREATE		0
-#define THREAD_RUN		1
-#define THREAD_SUSPEND		2
-#define THREAD_RESUME		3
+#define THREAD_FLAGS_MASK		0x00F0
+
+/* Create new thread, copy given space */
+#define THREAD_CREATE_COPYSPC		0x0010
+
+/* Create new thread and new space */
+#define THREAD_CREATE_NEWSPC		0x0020
+
+/* Create new thread, use given space */
+#define THREAD_CREATE_SAMESPC		0x0030
+
+
+#define THREAD_ACTION_MASK		0x000F
+#define THREAD_CREATE			0x0000
+#define THREAD_RUN			0x0001
+#define THREAD_SUSPEND			0x0002
+#define THREAD_RESUME			0x0003
 
 #endif /* __THREAD_H__ */
diff --git a/include/l4/arch/arm/v5/mm.h b/include/l4/arch/arm/v5/mm.h
index 9eb5c81..753005f 100644
--- a/include/l4/arch/arm/v5/mm.h
+++ b/include/l4/arch/arm/v5/mm.h
@@ -136,6 +136,7 @@ typedef struct fault_kdata {
 void add_section_mapping_init(unsigned int paddr, unsigned int vaddr,
 			      unsigned int size, unsigned int flags);
 
+pgd_table_t *copy_page_tables(pgd_table_t *from);
 void remap_as_pages(void *vstart, void *vend);
 
 void relocate_page_tables(void);
diff --git a/src/api/thread.c b/src/api/thread.c
index 0fd717d..9ce9391 100644
--- a/src/api/thread.c
+++ b/src/api/thread.c
@@ -10,6 +10,7 @@
 #include <l4/generic/tcb.h>
 #include <l4/lib/idpool.h>
 #include <l4/generic/pgalloc.h>
+#include INC_ARCH(mm.h)
 
 int sys_thread_switch(struct syscall_args *regs)
 {
@@ -59,51 +60,47 @@ int thread_start(struct task_ids *ids)
 	return -EINVAL;
 }
 
+
 /*
- * Creates a thread, with a new thread id, and depending on whether the space
- * id exists, either adds it to an existing space or creates a new space.
- *
- * NOTE: Add: Depending on whether the thread id exists, it creates a new space
- * copying the space of that thread id.
- *
- * For example:
- *      thread id = inval, space id = inval, -> new thread, new space.
- *      thread id = x, space id = inval, -> new thread, new space, copying space of x
- *      thread id = inval, space id = x, -> new thread, use space x.
+ * Creates a thread, with a new thread id, and depending on the flags,
+ * either creates a new space, uses the same space as another thread,
+ * or creates a new space copying the space of another thread. These
+ * are respectively used when creating a brand new task, creating a
+ * new thread in an existing address space, or forking a task.
  */
-int thread_create(struct task_ids *ids)
+int thread_create(struct task_ids *ids, unsigned int flags)
 {
 	struct ktcb *task, *new = (struct ktcb *)zalloc_page();
+	flags &= THREAD_FLAGS_MASK;
 
-	/* Visit all tasks to see if space ids match. */
-	list_for_each_entry(task, &global_task_list, task_list) {
-		/* Space ids match, can use existing space */
-		if (task->spid == ids->spid) {
-			BUG(); /* This is untested yet. */
-			goto spc_found;
-		}
-	}
+	if (flags == THREAD_CREATE_NEWSPC) {
+		/* Allocate new pgd and copy all kernel areas */
+		new->pgd = alloc_pgd();
+		copy_pgd_kern_all(new->pgd);
 
-	/* No existing space with such id. Creating a new address space */
-	new->pgd = alloc_pgd();
-
-	/* Copies all bits that are fixed for all tasks. */
-	copy_pgd_kern_all(new->pgd);
-
-	/* Get new space id */
-	if (ids->spid == TASK_ID_INVALID)
-		ids->spid = id_new(space_id_pool);
-	else
+		/* New space id, or requested id if available */
 		if ((ids->spid = id_get(space_id_pool, ids->spid)) < 0)
 			ids->spid = id_new(space_id_pool);
-
-spc_found:
-	/* Get a new thread id */
-	if (ids->tid == TASK_ID_INVALID)
+	} else {
+		/* Existing space will be used, find it from all tasks */
+		list_for_each_entry(task, &global_task_list, task_list) {
+			/* Space ids match, can use existing space */
+			if (task->spid == ids->spid) {
+				if (flags == THREAD_CREATE_SAMESPC)
+					new->pgd = task->pgd;
+				else
+					new->pgd = copy_page_tables(task->pgd);
+				goto out;
+			}
+		}
+		printk("Could not find given space, is ",
+		       "SAMESPC/COPYSPC the right flag?\n");
+		BUG();
+	}
+out:
+	/* New thread id, or requested id if available */
+	if ((ids->tid = id_get(thread_id_pool, ids->tid)) < 0)
 		ids->tid = id_new(thread_id_pool);
-	else
-		if ((ids->tid = id_get(thread_id_pool, ids->tid)) < 0)
-			ids->tid = id_new(thread_id_pool);
 
 	/* Set all ids */
 	set_task_ids(new, ids);
@@ -128,14 +125,14 @@ spc_found:
  */
 int sys_thread_control(struct syscall_args *regs)
 {
-	u32 *reg = (u32 *)regs;
-	unsigned int action = reg[0];
-	struct task_ids *ids = (struct task_ids *)reg[1];
 	int ret = 0;
+	u32 *reg = (u32 *)regs;
+	unsigned int flags = reg[0];
+	struct task_ids *ids = (struct task_ids *)reg[1];
 
-	switch (action) {
+	switch (flags & THREAD_ACTION_MASK) {
 	case THREAD_CREATE:
-		ret = thread_create(ids);
+		ret = thread_create(ids, flags);
 		break;
 	case THREAD_RUN:
 		ret = thread_start(ids);
diff --git a/src/arch/arm/v5/mm.c b/src/arch/arm/v5/mm.c
index 231b76b..e3ca3af 100644
--- a/src/arch/arm/v5/mm.c
+++ b/src/arch/arm/v5/mm.c
@@ -359,6 +359,42 @@ void remove_mapping(unsigned long vaddr)
 	remove_mapping_pgd(vaddr, current->pgd);
 }
 
+/*
+ * Allocates and copies all levels of page tables from one task to another.
+ * Useful when forking.
+ */
+pgd_table_t *copy_page_tables(pgd_table_t *from)
+{
+	struct pmd_table_t *pmd, *orig;
+	struct pgd_table_t *pgd;
+
+	/* Allocate and copy pgd */
+	pgd = alloc_pgd();
+	memcpy(pgd, from, sizeof(struct pgd_table_t));
+
+	/* Allocate and copy all valid pmds */
+	for (int i = 0; i < PGD_ENTRY_TOTAL; i++) {
+		/* Detect a pmd entry in original pgd? */
+		if ((pgd->entry[i] & PGD_TYPE_MASK) == PGD_TYPE_COARSE) {
+			/* Allocate new pmd */
+			pmd = alloc_pmd();
+
+			/* Find original pmd */
+			orig = (pmd_table_t *)
+				phys_to_virt((pgd->entry[i] &
+				PGD_COARSE_ALIGN_MASK));
+
+			/* Copy original to new */
+			memcpy(pmd, orig, sizeof(pmd_table_t));
+
+			/* Replace original pmd entry in pgd with new */
+			pgd->entry[i] = (pgd_t)virt_to_phys(pmd);
+			pgd->entry[i] |= PGD_TYPE_COARSE;
+		}
+	}
+	BUG();
+	return pgd;
+}
 
 extern pmd_table_t *pmd_array;
 
diff --git a/tasks/mm0/include/task.h b/tasks/mm0/include/task.h
index 89c878c..d7aaac1 100644
--- a/tasks/mm0/include/task.h
+++ b/tasks/mm0/include/task.h
@@ -87,6 +87,7 @@ struct tcb *find_task(int tid);
 struct initdata;
 void init_pm(struct initdata *initdata);
 
+struct tcb *task_create(struct task_ids *ids, unsigned int flags);
 int send_task_data(l4id_t requester);
 
 #endif /* __TASK_H__ */
diff --git a/tasks/mm0/include/vm_area.h b/tasks/mm0/include/vm_area.h
index a919f21..8f97b20 100644
--- a/tasks/mm0/include/vm_area.h
+++ b/tasks/mm0/include/vm_area.h
@@ -122,7 +122,7 @@ struct vm_pager {
 struct vm_object {
 	int npages;		    /* Number of pages in memory */
 	int refcnt;		    /* Number of shadows (or vmas) that refer */
-	struct list_head shadowers; /* List of vm objects that shadow this one */
+	struct list_head shadowers; /* List of links to the vm object that shadows this one */
 	struct vm_object *orig_obj; /* Original object that this one shadows */
 	unsigned int flags;	    /* Defines the type and flags of the object */
 	struct list_head list;	    /* List of all vm objects in memory */
@@ -142,7 +142,13 @@ struct vm_file {
 /* To create per-vma vm_object lists */
 struct vm_obj_link {
 	struct list_head list;
-	struct list_head shref;	/* Ref to shadowers by original objects */
+
+	/*
+	 * Ref to shadowers by original objects. This could be in the shadow
+	 * object itself, but then we would not be able to reach its link
+	 * when trying to free it.
+	 */
+	struct list_head shref;
 	struct vm_object *obj;
 };
 
@@ -218,6 +224,9 @@ struct page *task_virt_to_page(struct tcb *t, unsigned long virtual);
 int validate_task_range(struct tcb *t, unsigned long start,
 			unsigned long end, unsigned int vmflags);
 
+/* Changes all shadows and their ptes to read-only */
+int vm_freeze_shadows(struct tcb *task);
+
 /* Main page fault entry point */
 int page_fault_handler(l4id_t tid, fault_kdata_t *fkdata);
 
diff --git a/tasks/mm0/src/clone.c b/tasks/mm0/src/clone.c
index c88b16c..58492c2 100755
--- a/tasks/mm0/src/clone.c
+++ b/tasks/mm0/src/clone.c
@@ -4,41 +4,79 @@
  * Copyright (C) 2008 Bahadir Balban
  */
 #include <syscalls.h>
+#include <vm_area.h>
+#include <task.h>
 
-int copy_tcb(struct tcb *p, struct tcb *c)
+int copy_vmas(struct tcb *to, struct tcb *from)
 {
-	/* Copy program segments, file descriptors, vm areas */
+	struct vm_area *vma, new;
+	struct vm_obj_link *vmo_link, *new_link;
+
+	list_for_each_entry(vma, from->vm_area_list, list) {
+
+		/* Create a new vma */
+		new = vma_new(vma->pfn_start, vma->pfn_end - vma->pfn_start,
+			      vma->flags, vma->file_offset);
+
+		/*
+		 * Populate it with links to every object that the original
+		 * vma is linked to. Note, that we don't copy vm objects but
+		 * just the links to them, because vm objects are not
+		 * per-process data.
+		 */
+
+		/* Get the first object, either original file or a shadow */
+		if (!(vmo_link = vma_next_link(&vma->vm_obj_list, &vma->vm_obj_list))) {
+			printf("%s:%s: No vm object in vma!\n",
+			       __TASKNAME__, __FUNCTION__);
+			BUG();
+		}
+		/* Create a new link */
+		new_link = vm_objlink_create();
+		
+		/* Copy all fields from original link.
+		 * E.g. if ori
+
+	}
 }
 
-/*
- * Sets all r/w shadow objects as read-only for the process
- * so that copy-on-write incidents cause read faults.
- */
-int vm_freeze_shadows(struct tcb *t)
+int copy_tcb(struct tcb *to, struct tcb *from)
 {
-	/* Make all shadows read-only */
+	/* Copy program segments, file descriptors, vm areas */
+	to->start = from->start;
+	to->end = from->end;
+	to->text_start = from->text_start;
+	to->text_end = from->text_end;
+	to->data_start = from->data_start;
+	to->data_end = from->data_end;
+	to->bss_start = from->bss_start;
+	to->bss_end = from->bss_end;
+	to->stack_start = from->stack_start;
+	to->stack_end = from->stack_end;
+	to->heap_start = from->heap_start;
+	to->heap_end = from->heap_end;
+	to->env_start = from->env_start;
+	to->env_end = from->env_end;
+	to->args_start = from->args_start;
+	to->args_end = from->args_end;
+	to->map_start = from->map_start;
+	to->map_end = from->map_end;
 
-	/*
-	 * Make all writeable shadow entries
-	 * in the page table as read-only
-	 */
+	/* UTCB ??? */
+	BUG();
+
+	/* Copy all vm areas */
+	copy_vmas(to, from);
+
+	/* Copy all file descriptors */
+	memcpy(to->fd, from->fd,
+	       TASK_FILES_MAX * sizeof(struct file_descriptor));
 }
 
 int do_fork(struct tcb *parent)
 {
 	struct tcb *child;
-
-	/* Make all parent shadows read only */
-	vm_freeze_shadows(parent);
-
-	/* Create a new L4 thread with new space */
-	l4_thread_create(parent);
-
-	/* Create a new local tcb */
-	child = tcb_alloc_init();
-
-	/* Copy parent tcb to child */
-	copy_tcb(struct tcb *parent, struct tcb *child);
+	struct task_ids ids = { .tid = TASK_ID_INVALID, .spid = TASK_ID_INVALID };
 
 	/*
 	 * Allocate and copy parent pgd + all pmds to child.
@@ -54,6 +92,16 @@ int do_fork(struct tcb *parent)
 	 * every one of them will have to fault on frozen shadows individually.
 	 */
 
+	/* Make all shadows in this task read-only */
+	vm_freeze_shadows(parent);
+
+	/* Create a new L4 thread with parent's page tables copied */
+	ids.spid = parent->spid;
+	child = task_create(&ids, THREAD_CREATE_COPYSPACE);
+
+	/* Copy parent tcb to child */
+	copy_tcb(child, parent);
+
 	/* FIXME: Need to copy parent register values to child ??? */
 
 	/* Notify fs0 about forked process */
diff --git a/tasks/mm0/src/fault.c b/tasks/mm0/src/fault.c
index c3fe11a..9e28989 100644
--- a/tasks/mm0/src/fault.c
+++ b/tasks/mm0/src/fault.c
@@ -19,6 +19,12 @@
 #include <shm.h>
 #include <file.h>
 
+/* Given a page and the vma it is in, returns that page's virtual address */
+unsigned long vma_page_to_virtual(struct vm_area *vma, struct page *p)
+{
+	return __pfn_to_addr(vma->pfn_start + p->offset);
+}
+
 unsigned long fault_to_file_offset(struct fault_data *fault)
 {
 	/* Fault's offset in its vma */
@@ -53,7 +59,7 @@ struct vm_obj_link *vma_next_link(struct list_head *link,
 		return list_entry(link->next, struct vm_obj_link, list);
 }
 
-/* Unlinks obj_link from its vma and deletes it but keeps the object. */
+/* Unlinks orig_link from its vma and deletes it but keeps the object. */
 int vma_drop_link(struct vm_obj_link *shadower_link,
 		  struct vm_obj_link *orig_link)
 {
@@ -215,6 +221,7 @@ struct vm_obj_link *vma_create_shadow(void)
 	struct vm_object *vmo;
 	struct vm_obj_link *vmo_link;
 
+	/* FIXME: Why not use vm_objlink_create() ??? */
 	if (!(vmo_link = kzalloc(sizeof(*vmo_link))))
 		return 0;
 
@@ -507,6 +514,60 @@ out_success:
 	return 0;
 }
 
+/*
+ * Sets all r/w shadow objects as read-only for the process
+ * so that as expected after a fork() operation, writes to those
+ * objects cause copy-on-write incidents.
+ */
+int vm_freeze_shadows(struct tcb *task)
+{
+	unsigned long virtual;
+	struct vm_area *vma;
+	struct vm_obj_link *vmo_link;
+	struct vm_object *vmo;
+	struct page *p;
+
+	list_for_each_entry(vma, &task->vm_area_list, list) {
+
+		/* Shared vmas don't have shadows */
+		if (vma->flags & VMA_SHARED)
+			continue;
+
+		/* Get the first object */
+		while ((vmo_link = vma_next_link(&vma->vm_obj_list,
+					 	 &vma->vm_obj_list))) {
+			vmo = vmo_link->obj;
+
+			/* Is this a writeable shadow? */
+			if ((vmo->flags & VM_OBJ_SHADOW) &&
+			    (vmo->flags & VM_WRITE)) {
+
+				/* Make the object read only */
+				vmo->flags &= ~VM_WRITE;
+				vmo->flags |= VM_READ;
+
+				/*
+				 * Make all pages on it read-only
+				 * in the page tables.
+				 */
+				list_for_each_entry(p, &vmo->page_cache, list) {
+
+					/* Find virtual address of each page */
+					virtual = vma_page_to_virtual(vma, p);
+
+					/* Map the page as read-only */
+					l4_map((void *)page_to_phys(p),
+					       (void *)virtual,
+					       MAP_USR_RO_FLAGS, task->tid);
+				}
+				break;
+			}
+		}
+	}
+
+	return 0;
+}
+
 #if 0
 /*
  * Old function, likely to be ditched.
diff --git a/tasks/mm0/src/mmap.c b/tasks/mm0/src/mmap.c
index 6bddfcc..4059fc7 100644
--- a/tasks/mm0/src/mmap.c
+++ b/tasks/mm0/src/mmap.c
@@ -346,8 +346,7 @@ int sys_munmap(l4id_t sender, void *vaddr, unsigned long size)
 }
 
 struct vm_area *vma_new(unsigned long pfn_start, unsigned long npages,
-			unsigned int flags,  unsigned long file_offset,
-			struct vm_file *mapfile)
+			unsigned int flags, unsigned long file_offset)
 {
 	struct vm_area *vma;
 
@@ -526,8 +525,7 @@ int do_mmap(struct vm_file *mapfile, unsigned long file_offset,
 	}
 
 	/* For valid regions that aren't allocated by us, create the vma. */
-	if (!(new = vma_new(__pfn(map_address), npages, flags, file_offset,
-			    mapfile)))
+	if (!(new = vma_new(__pfn(map_address), npages, flags, file_offset)))
 		return -ENOMEM;
 
 	/* Attach the file as the first vm object of this vma */
diff --git a/tasks/mm0/src/task.c b/tasks/mm0/src/task.c
index 643ac0f..5b21ea6 100644
--- a/tasks/mm0/src/task.c
+++ b/tasks/mm0/src/task.c
@@ -76,13 +76,13 @@ struct tcb *tcb_alloc_init(void)
 }
 
 
-struct tcb *task_create(struct task_ids *ids)
+struct tcb *task_create(struct task_ids *ids, unsigned int flags)
 {
 	struct tcb *task;
 	int err;
 
 	/* Create the thread structures and address space */
-	if ((err = l4_thread_control(THREAD_CREATE, ids)) < 0) {
+	if ((err = l4_thread_control(THREAD_CREATE | flags, ids)) < 0) {
 		printf("l4_thread_control failed with %d.\n", err);
 		return PTR_ERR(err);
 	}
@@ -262,7 +262,7 @@ int task_exec(struct vm_file *f, unsigned long task_region_start,
 	struct tcb *task;
 	int err;
 
-	if (IS_ERR(task = task_create(ids)))
+	if (IS_ERR(task = task_create(ids, THREAD_CREATE_NEWSPC)))
 		return (int)task;
 
 	if ((err = task_setup_regions(f, task, task_region_start,