/* * mmap/munmap and friends. * * Copyright (C) 2007 Bahadir Balban */ #include #include #include INC_API(errno.h) #include #include #include #include #include #if 0 /* TODO: This is to be implemented when fs0 is ready. */ int do_msync(void *addr, unsigned long size, unsigned int flags, struct tcb *task) { // unsigned long npages = __pfn(size); struct vm_area *vma = find_vma((unsigned long)addr, &task->vm_area_list); if (!vma) return -EINVAL; /* Must check if this is a shadow copy or not */ if (vma->flags & VMA_COW) { ; /* ... Fill this in. ... */ } /* TODO: * Flush the vma's pages back to their file. Perhaps add a dirty bit * to the vma so that this can be completely avoided for clean vmas? * For anon pages this is the swap file. For real file-backed pages * its the real file. However, this can't be fully implemented yet since * we don't have FS0 yet. */ return 0; } /* * This releases a physical page struct from its owner and * frees the page back to the page allocator. */ int page_release(struct page *page) { spin_lock(&page->lock); page->refcnt--; BUG_ON(page->refcnt < -1); if (page->refcnt == -1) { /* Unlink the page from its owner's list */ list_del_init(&page->list); /* Zero out the fields */ page->owner = 0; page->flags = 0; page->f_offset = 0; page->virtual = 0; /* * No refs to page left, and since every physical memory page * comes from the page allocator, we return it back. */ free_page((void *)page_to_phys(page)); } spin_unlock(&page->lock); return 0; } /* * Freeing and unmapping of vma pages: * * For a vma that is about to be split, shrunk or destroyed, this function * finds out about the physical pages in memory that represent the vma, * reduces their refcount, and if they're unused, frees them back to the * physical page allocator, and finally unmaps those corresponding virtual * addresses from the unmapper task's address space. This sequence is * somewhat a rewinding of the actions that the page fault handler takes * when the vma was faulted by the process. */ int vma_release_pages(struct vm_area *vma, struct tcb *task, unsigned long pfn_start, unsigned long pfn_end) { unsigned long f_start, f_end; struct page *page, *n; /* Assume vma->pfn_start is lower than or equal to pfn_start */ BUG_ON(vma->pfn_start > pfn_start); /* Assume vma->pfn_end is higher or equal to pfn_end */ BUG_ON(vma->pfn_end < pfn_end); /* Find the file offsets of the range to be freed. */ f_start = vma->f_offset + pfn_start - vma->pfn_start; f_end = vma->f_offset + vma->pfn_end - pfn_end; list_for_each_entry_safe(page, n, &vma->owner->page_cache_list, list) { if (page->offset >= f_start && page->f_offset <= f_end) { l4_unmap((void *)virtual(page), 1, task->tid); page_release(page); } } return 0; } int vma_unmap(struct vm_area **orig, struct vm_area **new, unsigned long, unsigned long, struct tcb *); /* * This is called by every vma modifier function in vma_unmap(). This in turn * calls vma_unmap recursively to modify the shadow vmas, the same way the * actual vmas get modified. Only COW vmas would need to do this recursion * and the max level of recursion is one, since only one level of shadows exist. */ int vma_unmap_shadows(struct vm_area *vma, struct tcb *task, unsigned long pfn_start, unsigned long pfn_end) { struct vm_area *shadow, *n; /* Now do all shadows */ list_for_each_entry_safe(shadow, n, &vma->shadow_list, shadow_list) { BUG_ON(!(vma->flags & VMA_COW)); if (shadow->pfn_start >= pfn_start && shadow->pfn_end <= pfn_end) { struct vm_area *split_shadow; /* This may result in shrink/destroy/split of the shadow */ vma_unmap(&shadow, &split_shadow, pfn_start, pfn_end, task); if (shadow && split_shadow) list_add_tail(&split_shadow->list, &shadow->list); /* FIXME: Is this all to be done here??? Find what to do here. */ BUG(); } } return 0; } /* TODO: vma_destroy/shrink/split should also handle swap file modification */ /* Frees and unlinks a vma from its list. TODO: Add list locking */ int vma_destroy(struct vm_area *vma, struct tcb *task) { struct vm_area *shadow, *n; /* Release the vma pages */ vma_release_pages(vma, task, vma->pfn_start, vma->pfn_end); /* Free all shadows, if any. */ list_for_each_entry_safe(shadow, n, &vma->shadow_list, list) { /* Release all shadow pages */ vma_release_pages(shadow, task, shadow->pfn_start, shadow->pfn_end); list_del(&shadow->list); kfree(shadow); } /* Unlink and free the vma itself */ list_del(&vma->list); if (kfree(vma) < 0) BUG(); return 0; } /* This splits a vma, splitter region must be in the *middle* of original vma */ struct vm_area *vma_split(struct vm_area *vma, struct tcb *task, unsigned long pfn_start, unsigned long pfn_end) { struct vm_area *new, *shadow, *n; /* Allocate an uninitialised vma first */ if (!(new = vma_new(0, 0, 0, 0, 0))) return 0; /* * Some sanity checks to show that splitter range does end up * producing two smaller vmas. */ BUG_ON(vma->pfn_start >= pfn_start || vma->pfn_end <= pfn_end); /* Release the pages before modifying the original vma */ vma_release_pages(vma, task, pfn_start, pfn_end); new->pfn_end = vma->pfn_end; new->pfn_start = pfn_end; new->f_offset = vma->f_offset + new->pfn_start - vma->pfn_start; vma->pfn_end = pfn_start; new->flags = vma->flags; new->owner = vma->owner; /* Modify the shadows accordingly first. They may * split/shrink or get completely destroyed or stay still. */ vma_unmap_shadows(vma, task, pfn_start, pfn_end); /* * Now split the modified shadows list into two vmas: * If the file was COW and its vma had split, vma_new would have * a valid value and as such the shadows must be separated into * the two new vmas according to which one they belong to. */ list_for_each_entry_safe(shadow, n, &vma->shadow_list, shadow_list) { BUG_ON(!(vma->flags & VMA_COW)); BUG_ON(!(new->flags & VMA_COW)); if (shadow->pfn_start >= new->pfn_start && shadow->pfn_end <= new->pfn_end) { list_del_init(&shadow->list); list_add(&shadow->list, &new->shadow_list); } else BUG_ON(!(shadow->pfn_start >= vma->pfn_start && shadow->pfn_end <= vma->pfn_end)); } return new; } /* This shrinks the vma from *one* end only, either start or end */ int vma_shrink(struct vm_area *vma, struct tcb *task, unsigned long pfn_start, unsigned long pfn_end) { unsigned long diff; BUG_ON(pfn_start >= pfn_end); /* FIXME: Shadows are currently buggy - TBD */ if (!list_empty(&vma->shadow_list)) { BUG(); vma_swapfile_realloc(vma, pfn_start, pfn_end); return 0; } /* Release the pages before modifying the original vma */ vma_release_pages(vma, task, pfn_start, pfn_end); /* Shrink from the beginning */ if (pfn_start > vma->pfn_start) { diff = pfn_start - vma->pfn_start; vma->f_offset += diff; vma->pfn_start = pfn_start; /* Shrink from the end */ } else if (pfn_end < vma->pfn_end) { diff = vma->pfn_end - pfn_end; vma->pfn_end = pfn_end; } else BUG(); return vma_unmap_shadows(vma, task, pfn_start, pfn_end); } /* * Unmaps the given region from a vma. Depending on the region and vma range, * this may result in either shrinking, splitting or destruction of the vma. */ int vma_unmap(struct vm_area **actual, struct vm_area **split, unsigned long pfn_start, unsigned long pfn_end, struct tcb *task) { struct vm_area *vma = *actual; struct vm_area *vma_new = 0; /* Split needed? */ if (vma->pfn_start < pfn_start && vma->pfn_end > pfn_end) { if (!(vma_new = vma_split(vma, task, pfn_start, pfn_end))) return -ENOMEM; list_add_tail(&vma_new->list, &vma->list); /* Shrink needed? */ } else if (((vma->pfn_start == pfn_start) && (vma->pfn_end > pfn_end)) || ((vma->pfn_start < pfn_start) && (vma->pfn_end == pfn_end))) vma_shrink(vma, task, pfn_start, pfn_end); /* Destroy needed? */ else if ((vma->pfn_start >= pfn_start) && (vma->pfn_end <= pfn_end)) { /* NOTE: VMA can't be referred after this point. */ vma_destroy(vma, task); vma = 0; } else BUG(); /* Update actual pointers */ *actual = vma; *split = vma_new; return 0; } /* Unmaps given address range from its vma. Releases those pages in that vma. */ int do_munmap(void *vaddr, unsigned long size, struct tcb *task) { unsigned long npages = __pfn(size); unsigned long pfn_start = __pfn(vaddr); unsigned long pfn_end = pfn_start + npages; struct vm_area *vma, *vma_new = 0; int err; /* Check if any such vma exists */ if (!(vma = find_vma((unsigned long)vaddr, &task->vm_area_list))) return -EINVAL; /* * If end of the range is outside of the vma that has the start * address, we ignore the rest and assume end is the end of that vma. * TODO: Find out how posix handles this. */ if (pfn_end > vma->pfn_end) { printf("%s: %s: Warning, unmap end 0x%x beyond vma range. " "Ignoring.\n", __TASKNAME__, __FUNCTION__, __pfn_to_addr(pfn_end)); pfn_end = vma->pfn_end; } if ((err = vma_unmap(&vma, &vma_new, pfn_start, pfn_end, task)) < 0) return err; #if 0 mod_phys_pages: /* The stage where the actual pages are unmapped from the page tables */ pgtable_unmap: /* TODO: * - Find out if the vma is cow, and contains shadow vmas. * - Remove and free shadow vmas or the real vma, or shrink them if applicable. * - Free the swap file segment for the vma if vma is private (cow). * - Reduce refcount for the in-memory pages. * - If refcount is zero (they could be shared!), either add pages to some page * cache, or simpler the better, free the actual pages back to the page allocator. * - l4_unmap() the corresponding virtual region from the page tables. * * -- These are all done -- */ #endif return 0; } #endif int do_munmap(void *vaddr, unsigned long size, struct tcb *task) { return 0; } int sys_munmap(l4id_t sender, void *vaddr, unsigned long size) { struct tcb *task; BUG_ON(!(task = find_task(sender))); return do_munmap(vaddr, size, task); } struct vm_area *vma_new(unsigned long pfn_start, unsigned long npages, unsigned int flags, unsigned long file_offset, struct vm_file *mapfile) { struct vm_area *vma; struct vm_obj_link *obj_link; /* Allocate new area */ if (!(vma = kzalloc(sizeof(struct vm_area)))) return 0; /* Allocate vm object link */ if (!(obj_link = kzalloc(sizeof(struct vm_obj_link)))) { kfree(vma); return 0; } INIT_LIST_HEAD(&vma->list); INIT_LIST_HEAD(&vma->vm_obj_list); vma->pfn_start = pfn_start; vma->pfn_end = pfn_start + npages; vma->flags = flags; vma->file_offset = file_offset; INIT_LIST_HEAD(&obj_link->list); INIT_LIST_HEAD(&obj_link->shref); obj_link->obj = &mapfile->vm_obj; list_add(&obj_link->list, &vma->vm_obj_list); return vma; } int vma_intersect(unsigned long pfn_start, unsigned long pfn_end, struct vm_area *vma) { if ((pfn_start <= vma->pfn_start) && (pfn_end > vma->pfn_start)) { printf("%s: VMAs overlap.\n", __FUNCTION__); return 1; } if ((pfn_end >= vma->pfn_end) && (pfn_start < vma->pfn_end)) { printf("%s: VMAs overlap.\n", __FUNCTION__); return 1; } return 0; } /* * Search an empty space in the task's mmapable address region. */ unsigned long find_unmapped_area(unsigned long npages, struct tcb *task) { unsigned long pfn_start = __pfn(task->map_start); unsigned long pfn_end = pfn_start + npages; struct vm_area *vma; if (npages > __pfn(task->map_end - task->map_start)) return 0; /* If no vmas, first map slot is available. */ if (list_empty(&task->vm_area_list)) return USER_AREA_START; /* First vma to check our range against */ vma = list_entry(task->vm_area_list.next, struct vm_area, list); /* Start searching from task's end of data to start of stack */ while (pfn_end <= __pfn(USER_AREA_END)) { /* If intersection, skip the vma and fast-forward to next */ if (vma_intersect(pfn_start, pfn_end, vma)) { /* Update interval to next available space */ pfn_start = vma->pfn_end; pfn_end = pfn_start + npages; /* * Decision point, no more vmas left to check. * Are we out of task map area? */ if (vma->list.next == &task->vm_area_list) { if (pfn_end > __pfn(USER_AREA_END)) break; /* Yes, fail */ else /* No, success */ return __pfn_to_addr(pfn_start); } /* Otherwise get next vma entry */ vma = list_entry(vma->list.next, struct vm_area, list); continue; } BUG_ON(pfn_start + npages > __pfn(USER_AREA_END)); return __pfn_to_addr(pfn_start); } return 0; } /* * Maps the given file with given flags at the given page offset to the given * task's address space at the specified virtual memory address and length. * * The actual paging in/out of the file from/into memory pages is handled by * the file's pager upon page faults. */ int do_mmap(struct vm_file *mapfile, unsigned long file_offset, struct tcb *task, unsigned long map_address, unsigned int flags, unsigned int npages) { unsigned long file_npages = __pfn(page_align_up(mapfile->length)); unsigned long map_pfn = __pfn(map_address); struct vm_area *new, *mapped; if (!mapfile) { if (flags & VMA_ANONYMOUS) { mapfile = get_devzero(); file_offset = 0; } else BUG(); } else if (npages > file_npages - file_offset) { printf("%s: Trying to map %d pages from page %d, " "but file length is %d\n", __FUNCTION__, npages, file_offset, file_npages); return -EINVAL; } /* Check invalid page size */ if (npages == 0) { printf("Trying to map %d pages.\n", npages); return -EINVAL; } if (npages > __pfn(task->stack_start - task->data_end)) { printf("Trying to map too many pages: %d\n", npages); return -ENOMEM; } /* Check invalid map address */ if (map_address == 0 || map_address < USER_AREA_START || map_address >= USER_AREA_END) { /* Get new map address for region of this size */ if ((int)(map_address = find_unmapped_area(npages, task)) < 0) return (int)map_address; /* Create a new vma for newly allocated address */ else if (!(new = vma_new(__pfn(map_address), npages, flags, file_offset, mapfile))) return -ENOMEM; /* Successful? Add it to list and return */ goto out_success; } /* * FIXME: Currently we don't allow overlapping vmas. To be fixed soon * We need to handle intersection, splitting, shrink/grow etc. */ list_for_each_entry(mapped, &task->vm_area_list, list) BUG_ON(vma_intersect(map_pfn, map_pfn + npages, mapped)); /* For valid regions that aren't allocated by us, create the vma. */ if (!(new = vma_new(__pfn(map_address), npages, flags, file_offset, mapfile))) return -ENOMEM; out_success: printf("%s: Mapping 0x%x - 0x%x\n", __FUNCTION__, map_address, map_address + npages * PAGE_SIZE); list_add(&new->list, &task->vm_area_list); return 0; } /* mmap system call implementation */ int sys_mmap(l4id_t sender, void *start, size_t length, int prot, int flags, int fd, unsigned long pfn) { unsigned long npages = __pfn(page_align_up(length)); unsigned long base = (unsigned long)start; struct vm_file *file = 0; unsigned int vmflags = 0; struct tcb *task; int err; BUG_ON(!(task = find_task(sender))); if ((fd < 0 && !(flags & MAP_ANONYMOUS)) || fd > TASK_FILES_MAX) return -EINVAL; if (base < USER_AREA_START || base >= USER_AREA_END) return -EINVAL; /* Exclude task's stack, text and data from mmappable area in task's space */ if (base < task->map_start || base >= task->map_end || !base) { if (flags & MAP_FIXED) /* Its fixed, we cannot satisfy it */ return -EINVAL; else start = 0; } /* TODO: * Check that @start does not already have a mapping. * Check that pfn + npages range is within the file range. * Check that posix flags passed match those defined in vm_area.h */ if (flags & MAP_ANONYMOUS) { file = 0; vmflags |= VMA_ANONYMOUS; } else { file = task->fd[fd].vmfile; } if (flags & MAP_FIXED) vmflags |= VMA_FIXED; if (flags & MAP_PRIVATE) /* This means COW, if writeable. */ vmflags |= VMA_PRIVATE; else /* This also means COW, if writeable and anonymous */ vmflags |= VMA_SHARED; if (prot & PROT_READ) vmflags |= VM_READ; if (prot & PROT_WRITE) vmflags |= VM_WRITE; if (prot & PROT_EXEC) vmflags |= VM_EXEC; if ((err = do_mmap(file, __pfn_to_addr(pfn), task, base, vmflags, npages)) < 0) return err; return 0; } /* Sets the end of data segment for sender */ int sys_brk(l4id_t sender, void *ds_end) { return 0; }