diff --git a/build/compile_typeinfo.sh b/build/compile_typeinfo.sh deleted file mode 100644 index ceef1d1..0000000 --- a/build/compile_typeinfo.sh +++ /dev/null @@ -1,7 +0,0 @@ -cd .. -for item in kernel/runtime/std/typeinfo/*.d; -do - echo "--> $item" - ldc -nodefaultlib -g -I. -Ikernel/runtime/. -code-model=kernel -c $item -odbuild/dsss_objs/G/. ;\ -done -cd build diff --git a/build/confs/x86.conf b/build/confs/x86.conf index 0d5b6b4..6793052 100644 --- a/build/confs/x86.conf +++ b/build/confs/x86.conf @@ -8,7 +8,7 @@ buildflags=-dc=ldc-xomb [../kernel/core/kmain.d] -buildflags=-dc=ldc-xomb -I.. +buildflags=-dc=ldc-xomb-32 -I.. # compile the assembly for the target @@ -19,29 +19,32 @@ echo ; \ echo Setting up Architecture Dependence: x86_64; \ echo '--> architecture.d';\ mkdir -p dsss_imports;\ +mkdir -p dsss_objs;\ +mkdir -p dsss_objs/G;\ +mkdir -p dsss_objs/O;\ +cp ../kernel/arch/x86/imports/compile_typeinfo.sh dsss_imports/.;\ cp ../kernel/arch/x86/imports/architecture.d dsss_imports/.;\ echo ; \ -echo Compiling Assembly for target: x86 ;\ +echo Compiling Assembly for target: x86_64 ;\ echo '--> boot.S';\ -yasm -o dsss_objs/G/kernel.arch.x86.boot.boot.o ../kernel/arch/x86/boot/boot.s -felf;\ -echo '--> load.S';\ -yasm -o dsss_objs/G/kernel.arch.x86.load.load.o ../kernel/arch/x86/boot/load.s -felf;\ -\ +yasm -felf32 -mx86 -o dsss_objs/O/kernel.arch.x86.boot.boot.o ../kernel/arch/x86/boot/boot.s;\ \ echo ; \ echo Compiling Kernel Runtime ; \ echo '--> kernel/runtime/object.d';\ -ldc -nodefaultlib -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/object.d -ofdsss_objs/G/kernel.runtime.object.o; \ +ldc -nodefaultlib -m32 -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/object.d -ofdsss_objs/G/kernel.runtime.object.o; \ echo '--> kernel/runtime/invariant.d';\ -ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/invariant.d -ofdsss_objs/G/kernel.runtime.invariant.o; \ +ldc -nodefaultlib -m32 -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/invariant.d -ofdsss_objs/G/kernel.runtime.invariant.o; \ echo '--> kernel/runtime/std/typeinfo/*';\ -ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c `ls ../kernel/runtime/std/typeinfo/*.d` -oddsss_objs/G/. ;\ +sh ./dsss_imports/compile_typeinfo.sh;\ echo '--> kernel/runtime/dstubs.d';\ -ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/dstubs.d -ofdsss_objs/G/kernel.runtime.dstubs.o ;\ +ldc -nodefaultlib -m32 -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/dstubs.d -ofdsss_objs/G/kernel.runtime.dstubs.o ;\ echo '--> kernel/runtime/util.d';\ -ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/util.d -ofdsss_objs/G/kernel.runtime.util.o ;\ +ldc -nodefaultlib -m32 -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/util.d -ofdsss_objs/G/kernel.runtime.util.o ;\ echo '--> kernel/runtime/std/moduleinit.d';\ -ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/std/moduleinit.d -ofdsss_objs/G/kernel.runtime.std.moduleinit.o ;\ +ldc -nodefaultlib -m32 -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/std/moduleinit.d -ofdsss_objs/G/kernel.runtime.std.moduleinit.o ;\ +echo '--> kernel/runtime/precision.d';\ +ldc -nodefaultlib -m32 -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/precision.d -ofdsss_objs/G/kernel.runtime.precision.o ;\ \ echo ; \ echo Compiling Kernel Proper ; @@ -59,12 +62,12 @@ echo Creating Kernel Executable; \ echo '--> xomb';\ #llvm-ld -native -Xlinker=-nostdlib -Xlinker=-Tlinker.ld -Xlinker="-b elf64-x86-64" `ls dsss_objs/G/*.o` -o iso/boot/xomb ; \ #llvm-ld -nodefaultlib -g -I.. -I../kernel/runtime/. `ls dsss_objs/G/*.o` ;\ -ld -nostdlib -nodefaultlibs -b elf64-x86-64 -T ../kernel/arch/x86_64/linker.ld -o iso/boot/xomb `ls dsss_objs/G/*.o`;\ +ld -nostdlib -nodefaultlibs -melf_i386 -belf32-i386 -T ../kernel/arch/x86/linker.ld -o iso/boot/xomb `ls dsss_objs/G/*.o` `ls dsss_objs/O/*.o`;\ \ echo ;\ echo Creating Kernel Dump; \ echo '--> xomb.dump';\ -rm -f xomb.dump && x86_64-pc-elf-objdump -d -S -r iso/boot/xomb > xomb.dump;\ +rm -f xomb.dump && objdump -d -S -r iso/boot/xomb > xomb.dump;\ \ echo ;\ echo Compiling ISO; \ diff --git a/build/confs/x86_64.conf b/build/confs/x86_64.conf index 7d214c5..bfbe914 100644 --- a/build/confs/x86_64.conf +++ b/build/confs/x86_64.conf @@ -19,13 +19,17 @@ echo ; \ echo Setting up Architecture Dependence: x86_64; \ echo '--> architecture.d';\ mkdir -p dsss_imports;\ +mkdir -p dsss_objs;\ +mkdir -p dsss_objs/G;\ +mkdir -p dsss_objs/O;\ +cp ../kernel/arch/x86_64/imports/compile_typeinfo.sh dsss_imports/.;\ cp ../kernel/arch/x86_64/imports/architecture.d dsss_imports/.;\ echo ; \ echo Compiling Assembly for target: x86_64 ;\ -echo '--> boot.S';\ -yasm -o dsss_objs/G/kernel.arch.x86_64.boot.boot.o ../kernel/arch/x86_64/boot/boot.s -felf64;\ -echo '--> load.S';\ -yasm -o dsss_objs/G/kernel.arch.x86_64.load.load.o ../kernel/arch/x86_64/boot/load.s -felf64;\ +echo '--> boot.s';\ +yasm -o dsss_objs/O/kernel.arch.x86_64.boot.boot.o ../kernel/arch/x86_64/boot/boot.s -felf64;\ +echo '--> load.s';\ +yasm -o dsss_objs/O/kernel.arch.x86_64.load.load.o ../kernel/arch/x86_64/boot/load.s -felf64;\ \ \ echo ; \ @@ -35,7 +39,7 @@ ldc -nodefaultlib -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/run echo '--> kernel/runtime/invariant.d';\ ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/invariant.d -ofdsss_objs/G/kernel.runtime.invariant.o; \ echo '--> kernel/runtime/std/typeinfo/*';\ -sh ./compile_typeinfo.sh;\ +sh ./dsss_imports/compile_typeinfo.sh;\ echo '--> kernel/runtime/dstubs.d';\ ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/dstubs.d -ofdsss_objs/G/kernel.runtime.dstubs.o ;\ echo '--> kernel/runtime/util.d';\ @@ -59,7 +63,7 @@ echo Creating Kernel Executable; \ echo '--> xomb';\ #llvm-ld -native -Xlinker=-nostdlib -Xlinker=-Tlinker.ld -Xlinker="-b elf64-x86-64" `ls dsss_objs/G/*.o` -o iso/boot/xomb ; \ #llvm-ld -nodefaultlib -g -I.. -I../kernel/runtime/. `ls dsss_objs/G/*.o` ;\ -ld -nostdlib -nodefaultlibs -b elf64-x86-64 -T ../kernel/arch/x86_64/linker.ld -o iso/boot/xomb `ls dsss_objs/G/*.o`;\ +ld -nostdlib -nodefaultlibs -b elf64-x86-64 -T ../kernel/arch/x86_64/linker.ld -o iso/boot/xomb `ls dsss_objs/G/*.o` `ls dsss_objs/O/*.o`;\ \ echo ;\ echo Creating Kernel Dump; \ diff --git a/kernel/arch/x86/boot.s b/kernel/arch/x86/boot.s deleted file mode 100644 index 7181352..0000000 --- a/kernel/arch/x86/boot.s +++ /dev/null @@ -1,163 +0,0 @@ -; boot.s - -; entry is from bootloader - -section .text -bits 32 - -%include "defines.mac" - -; externs given by the linker script -extern _edata -extern _end - -; extern to the load.s -extern start64 -extern stack - -; define the starting point for this module -global start -global _start -start: -_start: - - ; Stash values for multiboot we won't touch until 64 bit mode - mov esi, ebx - mov edi, eax - - jmp start32 - - ; the multiboot header needs to be aligned at - ; a 32 bit boundary - align 4 - - multiboot_header: - - dd MULTIBOOT_HEADER_MAGIC - dd MULTIBOOT_HEADER_FLAGS - dd -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) - dd multiboot_header - dd _start - dd (_edata-KERNEL_VMA_BASE) - dd (_end-KERNEL_VMA_BASE) - dd _start - -; the 32 bit entry -global start32 -start32: - - ; disable interrupts - cli - - ; enable 64-bit page translation table entries - ; by setting CR4.PAE = 1. - ; - ; Paging is not enabled until long mode. - mov eax, cr4 - bts eax, 5 - mov cr4, eax - - ; Create long mode page table and init CR3 to - ; point to the base of the PML4 page table - mov eax, pml4_base - mov cr3, eax - - ; Enable Long mode and SYSCALL / SYSRET instructions - mov ecx, 0xC0000080 - rdmsr - bts eax, 8 - bts eax, 0 - wrmsr - - ; Load the 32 bit GDT - lgdt [pGDT32] - - ; Load the 32 bit IDT - ; lidt [pIDT32] - - ; establish a stack for 32 bit code - mov esp, (stack-KERNEL_VMA_BASE) + STACK_SIZE - - ; enable paging to activate long mode - mov eax, cr0 - bts eax, 31 - mov cr0, eax - - jmp CS_KERNEL:(start64-KERNEL_VMA_BASE) - -bits 64 -code64Jump: - jmp (start64-KERNEL_VMA_BASE) - - - - -; Data Structures Follow -bits 32 - -; 32 bit gdt - -align 4096 - -pGDT32: - dw GDT_END - GDT_TABLE - 1 - dq GDT_TABLE - KERNEL_VMA_BASE - -GDT_TABLE: - - dq 0x0000000000000000 ; Null Descriptor - dq 0x00cf9a000000ffff ; CS_KERNEL32 - dq 0x00af9a000000ffff,0 ; CS_KERNEL - dq 0x00af93000000ffff,0 ; DS_KERNEL - dq 0x00affa000000ffff,0 ; CS_USER - dq 0x00aff3000000ffff,0 ; DS_USER - dq 0,0 ; - dq 0,0 ; - dq 0,0 ; - dq 0,0 ; - - dq 0,0,0 ; Three TLS descriptors - dq 0x0000f40000000000 ; - -GDT_END: - - - - -; Temporary page tables - -; These assume linking to 0xFFFF800000000000 -align 4096 -pml4_base: - dq (pml3_base + 0x7) - times 255 dq 0 - dq (pml3_base + 0x7) - times 255 dq 0 - -align 4096 -pml3_base: - dq (pml2_base + 0x7) - times 511 dq 0 - -align 4096 -pml2_base: - %assign i 0 - %rep 25 - dq (pml1_base + i + 0x7) - %assign i i+4096 - %endrep - - times (512-25) dq 0 - -align 4096 -; 15 tables are described here -; this maps 40 MB from address 0x0 -; to an identity mapping -pml1_base: - %assign i 0 - %rep 512*25 - dq (i << 12) | 0x087 - %assign i i+1 - %endrep - - diff --git a/kernel/arch/x86/boot/boot.s b/kernel/arch/x86/boot/boot.s new file mode 100644 index 0000000..72a943b --- /dev/null +++ b/kernel/arch/x86/boot/boot.s @@ -0,0 +1,73 @@ +; boot.s + +; entry is from bootloader + +section .text +bits 32 + +; multiboot definitions +%define MULTIBOOT_HEADER_MAGIC 0x1BADB002 +%define MULTIBOOT_HEADER_FLAGS 0x00010003 + +; where is the kernel? +%define KERNEL_VMA_BASE 0x00100000 +%define KERNEL_LMA_BASE 0x00100000 + +; the gdt entry to use for the kernel +%define CS_KERNEL 0x10 +%define CS_KERNEL32 0x08 + +; externs given by the linker script +extern _edata +extern _end + +; extern to the load.s +extern start64 +extern stack + +; other definitions + +%define STACK_SIZE 0x4000 + +; define the starting point for this module +global start +global _start +start: +_start: + + ; Stash values for multiboot we won't touch until 64 bit mode + mov esi, ebx + mov edi, eax + + jmp start32 + + ; the multiboot header needs to be aligned at + ; a 32 bit boundary + align 4 + + multiboot_header: + + dd MULTIBOOT_HEADER_MAGIC + dd MULTIBOOT_HEADER_FLAGS + dd -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) + dd multiboot_header + dd _start + dd _edata + dd _end + dd _start + +; the 32 bit entry +global start32 +start32: + + ; disable interrupts + cli + +_loop: + jmp _loop + nop + nop + nop + nop + nop + nop diff --git a/kernel/arch/x86/defines.mac b/kernel/arch/x86/defines.mac deleted file mode 100644 index 0442163..0000000 --- a/kernel/arch/x86/defines.mac +++ /dev/null @@ -1,17 +0,0 @@ -; multiboot definitions -%define MULTIBOOT_HEADER_MAGIC 0x1BADB002 -%define MULTIBOOT_HEADER_FLAGS 0x00010003 - -; where is the kernel? -%define KERNEL_VMA_BASE 0xFFFF800000000000 -%define KERNEL_LMA_BASE 0x100000 - -; the gdt entry to use for the kernel -%define CS_KERNEL 0x10 -%define CS_KERNEL32 0x08 - -; other definitions - -%define STACK_SIZE 0x4000 - - diff --git a/kernel/arch/x86/imports/compile_typeinfo.sh b/kernel/arch/x86/imports/compile_typeinfo.sh new file mode 100644 index 0000000..bd65126 --- /dev/null +++ b/kernel/arch/x86/imports/compile_typeinfo.sh @@ -0,0 +1,7 @@ +cd .. +for item in kernel/runtime/std/typeinfo/*.d; +do + echo "--> $item" + ldc -nodefaultlib -g -I. -m32 -Ikernel/runtime/. -code-model=kernel -c $item -odbuild/dsss_objs/G/. ;\ +done +cd build diff --git a/kernel/arch/x86/linker.ld b/kernel/arch/x86/linker.ld new file mode 100644 index 0000000..53d1a64 --- /dev/null +++ b/kernel/arch/x86/linker.ld @@ -0,0 +1,134 @@ +/* + * linker.ld + * + * This script is given as the only script to the linker + * Will map boot.S to LMA, and then everything else + * will be linked to the VMA and mapped at the LMA + * _etext, _edata, _end are defined here + * + */ + +/* + * KERNEL LINK LOCATIONS + * + * these are the locations to map to + * they need to be set within boot.h + * as well + * + */ + +kernel_VMA = 0x100000; +kernel_LMA = 0x100000; + +/* start from the entry point */ +ENTRY(_start) +SECTIONS +{ + /* link from LMA */ + . = kernel_LMA; + + _kernelLMA = .; + + _boot = .; + + /* boot.S is ran in linear addresses */ + .text_boot : + { + dsss_objs/O/kernel.arch.x86.boot.boot.o (.text) + } + + _eboot = .; + + /* The following is for the trampoline code, if and when + * multiprocessor support will be necessary. + */ + + /* PROVIDE(_trampoline = .); + + .text_trampoline ALIGN(0x1000) : + { + dsss_objs/G/kernel.arch.x86_64.boot.trampoline.o (.text) + } + + PROVIDE(_etrampoline = .); */ + + /* link from VMA */ + . = . + kernel_VMA; + + _text = .; + + _kernel = .; + _kernelVMA = kernel_VMA; + + /* the rest of the code links to higher memory */ + .text : AT(ADDR(.text) - kernel_VMA + kernel_LMA) + { + code = .; + *(.text) + *(.text*) + + /* read only data */ + *(.rodata*) + *(.rdata*) + + . = ALIGN(4096); + } + + /*PROVIDE(_ekernel = .);*/ + + /* _etext defined */ + _etext = .; PROVIDE(etext = .); + + _data = .; + + /* data section */ + .data : AT(ADDR(.data) - kernel_VMA + kernel_LMA) + { + data = .; + *(.data) + + /* constructors and deconstructors + (if needed, doesn't hurt) */ + + start_ctors = .; + *(.ctor*) + end_ctors = .; + + start_dtors = .; + *(.dtor*) + end_dtors = .; + + . = ALIGN(4096); + } + + /* _edata defined */ + _edata = .; PROVIDE (edata = .); + + _bss = .; + + /* static code */ + .bss : AT(ADDR(.bss) - kernel_VMA + kernel_LMA) + { + *(.bss) + . = ALIGN(4096); + } + + _ebss = .; + + /* */ + .ehframe : AT(ADDR(.ehframe) - kernel_VMA + kernel_LMA) + { + ehframe = .; + *(.ehframe) + . = ALIGN(4096); + } + + + /* _end defined (for posterity and tradition) */ + _end = .; PROVIDE (end = .); + + _ekernel = .; + +} + + diff --git a/kernel/arch/x86/load.s b/kernel/arch/x86/load.s deleted file mode 100644 index 35b2d02..0000000 --- a/kernel/arch/x86/load.s +++ /dev/null @@ -1,101 +0,0 @@ -; load.s - -; entry is from boot.s - -bits 64 - -; Everywhere you see some weird addition logic -; This is to fit the addresses into 32 bit sizes -; Note, they will sign extend! - -section .text - -; include useful definitions -%include "defines.mac" - -; extern to kmain.d -extern kmain - -global start64 - -start64: - - ; Initialize the 64 bit stack pointer. - mov rsp, ((stack - KERNEL_VMA_BASE) + STACK_SIZE) - - ; Set up the stack for the return. - push CS_KERNEL - - ; RAX - the address to return to - mov rax, KERNEL_VMA_BASE >> 32 - shl rax, 32 - or rax, long_entry - (KERNEL_VMA_BASE & 0xffffffff00000000) - push rax - - ; Go into canonical higher half - ; It uses a trick to update the program counter - ; across a 64 bit address space - ret - -long_entry: - - ; From here on out, we are running instructions - ; within the higher half (0xffffffff80000000 ... ) - - ; We can safely upmap the lower half, we do not - ; need an identity mapping of this region - - ; set up a 64 bit virtual stack - mov rax, KERNEL_VMA_BASE >> 32 - shl rax, 32 - or rax, stack - (KERNEL_VMA_BASE & 0xffffffff00000000) - mov rsp, rax - - ; set cpu flags - push 0 - lss eax, [rsp] - popf - - ; set the input/output permission level to 3 - ; it will allow all access - - pushf - pop rax - or rax, 0x3000 - push rax - popf - - ; update the multiboot struct to point to a - ; virtual address - add rsi, (KERNEL_VMA_BASE & 0xffffffff) - - ; push the parameters (just in case) - push rsi - push rdi - - ; call kmain - call kmain - - - - ; we should not get here - -haltloop: - - hlt - jmp haltloop - nop - nop - nop - - - -; stack space -global stack -align 4096 - -stack: - %rep STACK_SIZE - dd 0 - %endrep - diff --git a/kernel/arch/x86_64/imports/compile_typeinfo.sh b/kernel/arch/x86_64/imports/compile_typeinfo.sh new file mode 100644 index 0000000..604d9f4 --- /dev/null +++ b/kernel/arch/x86_64/imports/compile_typeinfo.sh @@ -0,0 +1,7 @@ +cd .. +for item in kernel/runtime/std/typeinfo/*.d; +do + echo "--> $item" + ldc -nodefaultlib -g -I. -m64 -Ikernel/runtime/. -code-model=kernel -c $item -odbuild/dsss_objs/G/. ;\ +done +cd build diff --git a/kernel/arch/x86_64/linker.ld b/kernel/arch/x86_64/linker.ld index 59b087a..197c89f 100644 --- a/kernel/arch/x86_64/linker.ld +++ b/kernel/arch/x86_64/linker.ld @@ -34,7 +34,7 @@ SECTIONS /* boot.S is ran in linear addresses */ .text_boot : { - dsss_objs/G/kernel.arch.x86_64.boot.boot.o (.text) + dsss_objs/O/kernel.arch.x86_64.boot.boot.o (.text) } _eboot = .; diff --git a/kernel/runtime/precision.d b/kernel/runtime/precision.d new file mode 100644 index 0000000..d3692ae --- /dev/null +++ b/kernel/runtime/precision.d @@ -0,0 +1,600 @@ +/* + * precision.d + * + * This file implements a multiprecision divide function for non 64-bit + * systems. This original license for the file used as reference is below. + * The file originally was located: + * http://fxr.watson.org/fxr/source/libkern/qdivrem.c + * It has been updated for the D programming language and for usage + * within the XOmB kernel and XOmB Bare Bones packages. + * + * Author: Dave Wilkinson, The Regents of the University of California. + * + */ + +module kernel.runtime.precision; + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/libkern/qdivrem.c,v 1.8 1999/08/28 00:46:35 peter Exp $ + * $DragonFly: src/sys/libkern/qdivrem.c,v 1.4 2004/01/26 11:09:44 joerg Exp $ + */ + +/* + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), + * section 4.3.1, pp. 257--259. + */ + +//------------- + +/*#include +#include +#include +#include */ + +/* + * Depending on the desired operation, we view a 64 bit integer (a long) + * in these particular ways. + */ +union uu { + long l; + ulong ul; + int si[2]; + uint ui[2]; +} + +// These are architecture specific, and should be defined in the +// architecture import as a definition sheet. + +const size_t LONG_HIGHWORD = 1; +const size_t LONG_LOWWORD = 0; + +const size_t BYTE_BITS = 8; + +/* + * Define high and low longwords. (endian-ness) + */ +alias LONG_HIGHWORD H; +alias LONG_LOWWORD L; + +/* + * Total number of bits in a quad_t and in the pieces that make it up. + * These are used for shifting, and also below for halfword extraction + * and assembly. + */ +const uint QUAD_BITS = (8 * BYTE_BITS); +const uint LONG_BITS = (4 * BYTE_BITS); +const uint HALF_BITS = (4 * BYTE_BITS / 2); + +/* + * Extract high and low shortwords from longword, and move low shortword of + * longword to upper half of long, i.e., produce the upper longword of + * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.) + * + * These are used in the multiply code, to split a longword into upper + * and lower halves, and to reassemble a product as a quad_t, shifted left + * (sizeof(long)*CHAR_BIT/2). + */ +uint HHALF(uint x) { + return x >> HALF_BITS; +} + +int LHALF(uint x) { + return x & ((1 << HALF_BITS) - 1); +} + +uint LHUP(uint x) { + return x << HALF_BITS; +} + +typedef uint qshift_t; + +/* +quad_t __ashldi3(quad_t, qshift_t); +quad_t __ashrdi3(quad_t, qshift_t); +int __cmpdi2(quad_t a, quad_t b); +quad_t __divdi3(quad_t a, quad_t b); +quad_t __lshrdi3(quad_t, qshift_t); +quad_t __moddi3(quad_t a, quad_t b); +u_quad_t __qdivrem(u_quad_t u, u_quad_t v, u_quad_t *rem); +u_quad_t __udivdi3(u_quad_t a, u_quad_t b); +u_quad_t __umoddi3(u_quad_t a, u_quad_t b); +int __ucmpdi2(u_quad_t a, u_quad_t b); +*/ + +// ------------------ + +// digit base +const uint B = (1 << HALF_BITS); + +/* Combine two `digits' to make a single two-digit number. */ +uint COMBINE(uint a, uint b) { + return (a << HALF_BITS) | b; +} + +alias uint digit; + +/* + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that + * `fall out' the left (there never will be any such anyway). + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. + */ +void shl(digit* p, int len, int sh) +{ + int i; + + for (i = 0; i < len; i++) + { + p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); + } + + p[i] = LHALF(p[i] << sh); +} + +/* + * qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. + * + * We do this in base 2-sup-HALF_BITS, so that all intermediate products + * fit within u_long. As a consequence, the maximum length dividend and + * divisor are 4 `digits' in this base (they are shorter if they have + * leading zeros). + */ +ulong qdivrem(ulong uq, ulong vq, ulong* arq) +{ + uu tmp; + + digit* u; + digit* v; + digit* q; + + digit v1, v2; + + uint qhat, rhat, t; + + int m, n, d, j, i; + digit[5] uspace; + digit[5] vspace; + digit[5] qspace; + + /* + * Take care of special cases: divide by zero, and u < v. + */ + if (vq == 0) { + /* divide by zero. */ + volatile uint zero; + + tmp.ui[H] = tmp.ui[L] = 1 / zero; + if (arq) + { + *arq = uq; + } + return (tmp.l); + } + if (uq < vq) { + if (arq) { + *arq = uq; + } + return (0); + } + u = &uspace[0]; + v = &vspace[0]; + q = &qspace[0]; + + /* + * Break dividend and divisor into digits in base B, then + * count leading zeros to determine m and n. When done, we + * will have: + * u = (u[1]u[2]...u[m+n]) sub B + * v = (v[1]v[2]...v[n]) sub B + * v[1] != 0 + * 1 < n <= 4 (if n = 1, we use a different division algorithm) + * m >= 0 (otherwise u < v, which we already checked) + * m + n = 4 + * and thus + * m = 4 - n <= 2 + */ + tmp.ul = uq; + u[0] = 0; + u[1] = HHALF(tmp.ui[H]); + u[2] = LHALF(tmp.ui[H]); + u[3] = HHALF(tmp.ui[L]); + u[4] = LHALF(tmp.ui[L]); + tmp.ul = vq; + v[1] = HHALF(tmp.ui[H]); + v[2] = LHALF(tmp.ui[H]); + v[3] = HHALF(tmp.ui[L]); + v[4] = LHALF(tmp.ui[L]); + for (n = 4; v[1] == 0; v++) { + if (--n == 1) { + uint rbj; /* r*B+u[j] (not root boy jim) */ + digit q1, q2, q3, q4; + + /* + * Change of plan, per exercise 16. + * r = 0; + * for j = 1..4: + * q[j] = floor((r*B + u[j]) / v), + * r = (r*B + u[j]) % v; + * We unroll this completely here. + */ + t = v[2]; /* nonzero, by definition */ + q1 = u[1] / t; + rbj = COMBINE(u[1] % t, u[2]); + q2 = rbj / t; + rbj = COMBINE(rbj % t, u[3]); + q3 = rbj / t; + rbj = COMBINE(rbj % t, u[4]); + q4 = rbj / t; + if (arq) + *arq = rbj % t; + tmp.ui[H] = COMBINE(q1, q2); + tmp.ui[L] = COMBINE(q3, q4); + return (tmp.l); + } + } + + /* + * By adjusting q once we determine m, we can guarantee that + * there is a complete four-digit quotient at &qspace[1] when + * we finally stop. + */ + for (m = 4 - n; u[1] == 0; u++) { + m--; + } + + for (i = 4 - m; --i >= 0;) { + q[i] = 0; + } + + q += 4 - m; + + /* + * Here we run Program D, translated from MIX to C and acquiring + * a few minor changes. + * + * D1: choose multiplier 1 << d to ensure v[1] >= B/2. + */ + + d = 0; + + for (t = v[1]; t < B / 2; t <<= 1) { + d++; + } + + if (d > 0) { + shl(&u[0], m + n, d); /* u <<= d */ + shl(&v[1], n - 1, d); /* v <<= d */ + } + + /* + * D2: j = 0. + */ + + j = 0; + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ + v2 = v[2]; /* for D3 */ + do { + digit uj0, uj1, uj2; + + /* + * D3: Calculate qhat (\^q, in TeX notation). + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and + * let rhat = (u[j]*B + u[j+1]) mod v[1]. + * While rhat < B and v[2]*qhat > rhat*B+u[j+2], + * decrement qhat and increase rhat correspondingly. + * Note that if rhat >= B, v[2]*qhat < rhat*B. + */ + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ + uj1 = u[j + 1]; /* for D3 only */ + uj2 = u[j + 2]; /* for D3 only */ + + if (uj0 == v1) { + qhat = B; + rhat = uj1; + goto qhat_too_big; + } else { + uint nn = COMBINE(uj0, uj1); + qhat = nn / v1; + rhat = nn % v1; + } + + while (v2 * qhat > COMBINE(rhat, uj2)) { + +qhat_too_big: + + qhat--; + if ((rhat += v1) >= B) { + break; + } + } + + /* + * D4: Multiply and subtract. + * The variable `t' holds any borrows across the loop. + * We split this up so that we do not require v[0] = 0, + * and to eliminate a final special case. + */ + + for (t = 0, i = n; i > 0; i--) { + t = u[i + j] - v[i] * qhat - t; + u[i + j] = LHALF(t); + t = (B - HHALF(t)) & (B - 1); + } + t = u[j] - t; + u[j] = LHALF(t); + /* + * D5: test remainder. + * There is a borrow if and only if HHALF(t) is nonzero; + * in that (rare) case, qhat was too large (by exactly 1). + * Fix it by adding v[1..n] to u[j..j+n]. + */ + if (HHALF(t)) { + qhat--; + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ + t += u[i + j] + v[i]; + u[i + j] = LHALF(t); + t = HHALF(t); + } + u[j] = LHALF(u[j] + t); + } + q[j] = qhat; + } while (++j <= m); /* D7: loop on j. */ + + /* + * If caller wants the remainder, we have to calculate it as + * u[m..m+n] >> d (this is at most n digits and thus fits in + * u[m+1..m+n], but we may need more source digits). + */ + if (arq) { + if (d) { + for (i = m + n; i > m; --i) + { + u[i] = (u[i] >> d) | LHALF(u[i - 1] << (HALF_BITS - d)); + } + u[i] = 0; + } + + tmp.ui[H] = COMBINE(uspace[1], uspace[2]); + tmp.ui[L] = COMBINE(uspace[3], uspace[4]); + *arq = tmp.l; + } + + tmp.ui[H] = COMBINE(qspace[1], qspace[2]); + tmp.ui[L] = COMBINE(qspace[3], qspace[4]); + return (tmp.l); +} + +// Return 0, 1, or 2 as a <, =, > b respectively. +// Neither a nor b are considered signed. +int ucmpdi2(ulong a, ulong b) +{ + uu aa, bb; + + aa.ul = a; + bb.ul = b; + return (aa.ui[H] < bb.ui[H] ? 0 : aa.ui[H] > bb.ui[H] ? 2 : + aa.ui[L] < bb.ui[L] ? 0 : aa.ui[L] > bb.ui[L] ? 2 : 1); +} + +extern(C) int __ucmpdi2(ulong a, ulong b) +{ + return ucmpdi2(a,b); +} + +// Divide two unsigned longs +ulong udivdi3(ulong a, ulong b) +{ + return qdivrem(a, b, null); +} + +extern(C) ulong __udivdi3(ulong a, ulong b) +{ + return udivdi3(a,b); +} + +// Modulus two unsigned longs +ulong umoddi3(ulong a, ulong b) +{ + ulong r; + qdivrem(a, b, &r); + return r; +} + +extern(C) ulong __umoddi3(ulong a, ulong b) +{ + return umoddi3(a,b); +} + +// Logical shift right of an unsigned long +long lshrdi3(long a, qshift_t shift) +{ + uu aa; + + aa.l = a; + if (shift >= LONG_BITS) { + aa.ui[L] = shift >= QUAD_BITS ? 0 : + aa.ui[H] >> (shift - LONG_BITS); + aa.ui[H] = 0; + } else if (shift > 0) { + aa.ui[L] = (aa.ui[L] >> shift) | + (aa.ui[H] << (LONG_BITS - shift)); + aa.ui[H] >>= shift; + } + + return aa.l; +} + +extern(C) long __lshrdi3(long a, qshift_t shift) +{ + return lshrdi3(a, shift); +} + +// Arithmetic Shift Left of a signed long +// A.K.A. Logical Shift Left +long ashldi3(long a, qshift_t shift) +{ + uu aa; + + aa.l = a; + if (shift >= LONG_BITS) { + aa.ui[H] = shift >= QUAD_BITS ? 0 : + aa.ui[L] << (shift - LONG_BITS); + aa.ui[L] = 0; + } else if (shift > 0) { + aa.ui[H] = (aa.ui[H] << shift) | + (aa.ui[L] >> (LONG_BITS - shift)); + aa.ui[L] <<= shift; + } + + return aa.l; +} + +extern(C) long __ashldi3(long a, qshift_t shift) +{ + return ashldi3(a, shift); +} + +// Arithmetic Shift Right of a signed long +long ashrdi3(long a, qshift_t shift) +{ + uu aa; + + aa.l = a; + if (shift >= LONG_BITS) { + int s; + + /* Smear bits rightward using the machine's right-shift method, + whether that is sign extension or zero fill, to get the + 'sign word' s. Note that shifting by LONG_BITS is + undefined, so we shift (LONG_BITS-1), then 1 more, to get + our answer */ + + s = (aa.si[H] >> (LONG_BITS - 1)) >> 1; + aa.ui[L] = shift >= QUAD_BITS ? s : + aa.si[H] >> (shift - LONG_BITS); + aa.ui[H] = s; + } else if (shift > 0) { + aa.ui[L] = (aa.ui[L] >> shift) | + (aa.ui[H] << (LONG_BITS - shift)); + aa.si[H] >>= shift; + } + + return aa.l; +} + +extern(C) long __ashrdi3(long a, qshift_t shift) +{ + return ashrdi3(a,shift); +} + +// Return 0, 1, or 2 as a <, =, > b respectively. +// Both a and b are considered signed -- which means only +// the high word is signed. +int cmpdi2(long a, long b) +{ + uu aa, bb; + + aa.l = a; + bb.l = b; + + return (aa.si[H] < bb.si[H] ? 0 : aa.si[H] > bb.si[H] ? 2 : + aa.ui[L] < bb.ui[L] ? 0 : aa.ui[L] > bb.ui[L] ? 2 : 1); +} + +extern(C) int __cmpdi2(long a, long b) +{ + return cmpdi2(a,b); +} + +// Divide two signed longs +long divdi3(long a, long b) +{ + ulong ua, ub, ul; + int neg; + + if (a < 0) { + ua = -cast(ulong)a; + neg = 1; + } else { + ua = a; + neg = 0; + } + + if (b < 0) { + ub = -cast(ulong)b; + neg ^= 1; + } else { + ub = b; + } + + ul = qdivrem(ua, ub, null); + return (neg ? -ul : ul); +} + +extern(C) long __divdi3(long a, long b) +{ + return divdi3(a,b); +} + +// Modulus two signed longs +long moddi3(long a, long b) +{ + ulong ua, ub, ur; + int neg; + + if (a < 0) { + ua = -cast(ulong)a; + neg = 1; + } else { + ua = a; + neg = 0; + } + + if (b < 0) { + ub = -cast(ulong)b; + } else { + ub = b; + } + + qdivrem(ua, ub, &ur); + return (neg ? -ur : ur); +} + +extern(C) long __moddi3(long a, long b) +{ + return moddi3(a,b); +}