I don't think I broke anything. 32-bit bare-bones has been added.

Signed-off-by: The XOmB Overlord <overlord@xomb.net>
2026-01-11 18:33:15 +01:00 · 2009-05-24 12:46:04 +08:00
parent 5f08273316
commit 432ee732ac
12 changed files with 849 additions and 309 deletions
--- a/build/compile_typeinfo.sh
+++ b/build/compile_typeinfo.sh
@@ -1,7 +0,0 @@
 cd ..
 for item in kernel/runtime/std/typeinfo/*.d;
 do
 	echo "--> $item"
 	ldc -nodefaultlib -g -I. -Ikernel/runtime/. -code-model=kernel -c $item -odbuild/dsss_objs/G/. ;\
 done
 cd build
--- a/build/confs/x86.conf
+++ b/build/confs/x86.conf
@@ -8,7 +8,7 @@ buildflags=-dc=ldc-xomb
 [../kernel/core/kmain.d]
-buildflags=-dc=ldc-xomb -I..
+buildflags=-dc=ldc-xomb-32 -I..
 # compile the assembly for the target
@@ -19,29 +19,32 @@ echo ; \
 echo Setting up Architecture Dependence: x86_64; \
 echo '--> architecture.d';\
 mkdir -p dsss_imports;\
 mkdir -p dsss_objs;\
 mkdir -p dsss_objs/G;\
 mkdir -p dsss_objs/O;\
 cp ../kernel/arch/x86/imports/compile_typeinfo.sh dsss_imports/.;\
 cp ../kernel/arch/x86/imports/architecture.d dsss_imports/.;\
 echo ; \
-echo Compiling Assembly for target: x86 ;\
+echo Compiling Assembly for target: x86_64 ;\
 echo '--> boot.S';\
-yasm -o dsss_objs/G/kernel.arch.x86.boot.boot.o ../kernel/arch/x86/boot/boot.s -felf;\
+yasm -felf32 -mx86 -o dsss_objs/O/kernel.arch.x86.boot.boot.o ../kernel/arch/x86/boot/boot.s;\
 echo '--> load.S';\
 yasm -o dsss_objs/G/kernel.arch.x86.load.load.o ../kernel/arch/x86/boot/load.s -felf;\
 \
 \
 echo ; \
 echo Compiling Kernel Runtime ; \
 echo '--> kernel/runtime/object.d';\
-ldc -nodefaultlib -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/object.d -ofdsss_objs/G/kernel.runtime.object.o; \
+ldc -nodefaultlib -m32 -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/object.d -ofdsss_objs/G/kernel.runtime.object.o; \
 echo '--> kernel/runtime/invariant.d';\
-ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/invariant.d -ofdsss_objs/G/kernel.runtime.invariant.o; \
+ldc -nodefaultlib -m32 -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/invariant.d -ofdsss_objs/G/kernel.runtime.invariant.o; \
 echo '--> kernel/runtime/std/typeinfo/*';\
-ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c `ls ../kernel/runtime/std/typeinfo/*.d` -oddsss_objs/G/. ;\
+sh ./dsss_imports/compile_typeinfo.sh;\
 echo '--> kernel/runtime/dstubs.d';\
-ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/dstubs.d -ofdsss_objs/G/kernel.runtime.dstubs.o ;\
+ldc -nodefaultlib -m32 -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/dstubs.d -ofdsss_objs/G/kernel.runtime.dstubs.o ;\
 echo '--> kernel/runtime/util.d';\
-ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/util.d -ofdsss_objs/G/kernel.runtime.util.o ;\
+ldc -nodefaultlib -m32 -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/util.d -ofdsss_objs/G/kernel.runtime.util.o ;\
 echo '--> kernel/runtime/std/moduleinit.d';\
-ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/std/moduleinit.d -ofdsss_objs/G/kernel.runtime.std.moduleinit.o ;\
+ldc -nodefaultlib -m32 -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/std/moduleinit.d -ofdsss_objs/G/kernel.runtime.std.moduleinit.o ;\
 echo '--> kernel/runtime/precision.d';\
 ldc -nodefaultlib -m32 -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/precision.d -ofdsss_objs/G/kernel.runtime.precision.o ;\
 \
 echo ; \
 echo Compiling Kernel Proper ;
@@ -59,12 +62,12 @@ echo Creating Kernel Executable; \
 echo '--> xomb';\
 #llvm-ld -native -Xlinker=-nostdlib -Xlinker=-Tlinker.ld -Xlinker="-b elf64-x86-64" `ls dsss_objs/G/*.o` -o iso/boot/xomb ; \
 #llvm-ld -nodefaultlib -g -I.. -I../kernel/runtime/. `ls dsss_objs/G/*.o` ;\
-ld -nostdlib -nodefaultlibs -b elf64-x86-64 -T ../kernel/arch/x86_64/linker.ld -o iso/boot/xomb `ls dsss_objs/G/*.o`;\
+ld -nostdlib -nodefaultlibs -melf_i386 -belf32-i386 -T ../kernel/arch/x86/linker.ld -o iso/boot/xomb `ls dsss_objs/G/*.o` `ls dsss_objs/O/*.o`;\
 \
 echo ;\
 echo Creating Kernel Dump; \
 echo '--> xomb.dump';\
-rm -f xomb.dump && x86_64-pc-elf-objdump -d -S -r iso/boot/xomb > xomb.dump;\
+rm -f xomb.dump && objdump -d -S -r iso/boot/xomb > xomb.dump;\
 \
 echo ;\
 echo Compiling ISO; \
--- a/build/confs/x86_64.conf
+++ b/build/confs/x86_64.conf
@@ -19,13 +19,17 @@ echo ; \
 echo Setting up Architecture Dependence: x86_64; \
 echo '--> architecture.d';\
 mkdir -p dsss_imports;\
 mkdir -p dsss_objs;\
 mkdir -p dsss_objs/G;\
 mkdir -p dsss_objs/O;\
 cp ../kernel/arch/x86_64/imports/compile_typeinfo.sh dsss_imports/.;\
 cp ../kernel/arch/x86_64/imports/architecture.d dsss_imports/.;\
 echo ; \
 echo Compiling Assembly for target: x86_64 ;\
-echo '--> boot.S';\
+echo '--> boot.s';\
-yasm -o dsss_objs/G/kernel.arch.x86_64.boot.boot.o ../kernel/arch/x86_64/boot/boot.s -felf64;\
+yasm -o dsss_objs/O/kernel.arch.x86_64.boot.boot.o ../kernel/arch/x86_64/boot/boot.s -felf64;\
-echo '--> load.S';\
+echo '--> load.s';\
-yasm -o dsss_objs/G/kernel.arch.x86_64.load.load.o ../kernel/arch/x86_64/boot/load.s -felf64;\
+yasm -o dsss_objs/O/kernel.arch.x86_64.load.load.o ../kernel/arch/x86_64/boot/load.s -felf64;\
 \
 \
 echo ; \
@@ -35,7 +39,7 @@ ldc -nodefaultlib -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/run
 echo '--> kernel/runtime/invariant.d';\
 ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/invariant.d -ofdsss_objs/G/kernel.runtime.invariant.o; \
 echo '--> kernel/runtime/std/typeinfo/*';\
-sh ./compile_typeinfo.sh;\
+sh ./dsss_imports/compile_typeinfo.sh;\
 echo '--> kernel/runtime/dstubs.d';\
 ldc -nodefaultlib -g -I.. -I../kernel/runtime/. -code-model=kernel -c ../kernel/runtime/dstubs.d -ofdsss_objs/G/kernel.runtime.dstubs.o ;\
 echo '--> kernel/runtime/util.d';\
@@ -59,7 +63,7 @@ echo Creating Kernel Executable; \
 echo '--> xomb';\
 #llvm-ld -native -Xlinker=-nostdlib -Xlinker=-Tlinker.ld -Xlinker="-b elf64-x86-64" `ls dsss_objs/G/*.o` -o iso/boot/xomb ; \
 #llvm-ld -nodefaultlib -g -I.. -I../kernel/runtime/. `ls dsss_objs/G/*.o` ;\
-ld -nostdlib -nodefaultlibs -b elf64-x86-64 -T ../kernel/arch/x86_64/linker.ld -o iso/boot/xomb `ls dsss_objs/G/*.o`;\
+ld -nostdlib -nodefaultlibs -b elf64-x86-64 -T ../kernel/arch/x86_64/linker.ld -o iso/boot/xomb `ls dsss_objs/G/*.o` `ls dsss_objs/O/*.o`;\
 \
 echo ;\
 echo Creating Kernel Dump; \
--- a/kernel/arch/x86/boot.s
+++ b/kernel/arch/x86/boot.s
@@ -1,163 +0,0 @@
 ; boot.s
 ; entry is from bootloader
 section .text
 bits 32
 %include "defines.mac"
 ; externs given by the linker script
 extern _edata
 extern _end
 ; extern to the load.s
 extern start64
 extern stack
 ; define the starting point for this module
 global start
 global _start
 start:
 _start:
 	; Stash values for multiboot we won't touch until 64 bit mode
 	mov esi, ebx
 	mov edi, eax
 	jmp start32
 	; the multiboot header needs to be aligned at
 	; a 32 bit boundary
 	align 4
 	multiboot_header:
 	dd MULTIBOOT_HEADER_MAGIC
 	dd MULTIBOOT_HEADER_FLAGS
 	dd -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
 	dd multiboot_header
 	dd _start
 	dd (_edata-KERNEL_VMA_BASE)
 	dd (_end-KERNEL_VMA_BASE)
 	dd _start
 ; the 32 bit entry
 global start32
 start32:
 	; disable interrupts
 	cli
 	; enable 64-bit page translation table entries
 	; by setting CR4.PAE = 1.
 	;
 	; Paging is not enabled until long mode.
 	mov eax, cr4
 	bts eax, 5
 	mov cr4, eax
 	; Create long mode page table and init CR3 to
 	; point to the base of the PML4 page table
 	mov eax, pml4_base
 	mov cr3, eax
 	; Enable Long mode and SYSCALL / SYSRET instructions
 	mov ecx, 0xC0000080
 	rdmsr
 	bts eax, 8
 	bts eax, 0
 	wrmsr
 	; Load the 32 bit GDT
 	lgdt [pGDT32]
 	; Load the 32 bit IDT
 	; lidt [pIDT32]
 	; establish a stack for 32 bit code
 	mov esp, (stack-KERNEL_VMA_BASE) + STACK_SIZE
 	; enable paging to activate long mode
 	mov eax, cr0
 	bts eax, 31
 	mov cr0, eax
 	jmp CS_KERNEL:(start64-KERNEL_VMA_BASE)
 bits 64
 code64Jump:
 	jmp (start64-KERNEL_VMA_BASE)
 ; Data Structures Follow
 bits 32
 ; 32 bit gdt
 align 4096
 pGDT32:
 	dw GDT_END - GDT_TABLE - 1
 	dq GDT_TABLE - KERNEL_VMA_BASE
 GDT_TABLE:
 	dq 0x0000000000000000	; Null Descriptor
 	dq 0x00cf9a000000ffff	; CS_KERNEL32
 	dq 0x00af9a000000ffff,0	; CS_KERNEL
 	dq 0x00af93000000ffff,0	; DS_KERNEL
 	dq 0x00affa000000ffff,0	; CS_USER
 	dq 0x00aff3000000ffff,0	; DS_USER
 	dq 0,0					;
 	dq 0,0					;
 	dq 0,0					;
 	dq 0,0					;
 	dq 0,0,0				; Three TLS descriptors
 	dq 0x0000f40000000000	;
 GDT_END:
 ; Temporary page tables
 ; These assume linking to 0xFFFF800000000000
 align 4096
 pml4_base:
 	dq (pml3_base + 0x7)
 	times 255 dq 0
 	dq (pml3_base + 0x7)
 	times 255 dq 0
 align 4096
 pml3_base:
 	dq (pml2_base + 0x7)
 	times 511 dq 0
 align 4096
 pml2_base:
 	%assign i 0
 	%rep 25
 	dq (pml1_base + i + 0x7)
 	%assign i i+4096
 	%endrep
 	times (512-25) dq 0
 align 4096
 ; 15 tables are described here
 ; this maps 40 MB from address 0x0
 ; to an identity mapping
 pml1_base:
 	%assign i 0
 	%rep 512*25
 	dq (i << 12) | 0x087
 	%assign i i+1
 	%endrep
--- a/kernel/arch/x86/boot/boot.s
+++ b/kernel/arch/x86/boot/boot.s
@@ -0,0 +1,73 @@
 ; boot.s
 ; entry is from bootloader
 section .text
 bits 32
 ; multiboot definitions
 %define MULTIBOOT_HEADER_MAGIC	0x1BADB002
 %define MULTIBOOT_HEADER_FLAGS	0x00010003
 ; where is the kernel?
 %define KERNEL_VMA_BASE			0x00100000
 %define KERNEL_LMA_BASE			0x00100000
 ; the gdt entry to use for the kernel
 %define CS_KERNEL				0x10
 %define CS_KERNEL32				0x08
 ; externs given by the linker script
 extern _edata
 extern _end
 ; extern to the load.s
 extern start64
 extern stack
 ; other definitions
 %define STACK_SIZE				0x4000
 ; define the starting point for this module
 global start
 global _start
 start:
 _start:
 	; Stash values for multiboot we won't touch until 64 bit mode
 	mov esi, ebx
 	mov edi, eax
 	jmp start32
 	; the multiboot header needs to be aligned at
 	; a 32 bit boundary
 	align 4
 	multiboot_header:
 	dd MULTIBOOT_HEADER_MAGIC
 	dd MULTIBOOT_HEADER_FLAGS
 	dd -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
 	dd multiboot_header
 	dd _start
 	dd _edata
 	dd _end
 	dd _start
 ; the 32 bit entry
 global start32
 start32:
 	; disable interrupts
 	cli
 _loop:
 	jmp _loop
 	nop
 	nop
 	nop
 	nop
 	nop
 	nop
--- a/kernel/arch/x86/defines.mac
+++ b/kernel/arch/x86/defines.mac
@@ -1,17 +0,0 @@
 ; multiboot definitions
 %define MULTIBOOT_HEADER_MAGIC	0x1BADB002
 %define MULTIBOOT_HEADER_FLAGS	0x00010003
 ; where is the kernel?
 %define KERNEL_VMA_BASE			0xFFFF800000000000
 %define KERNEL_LMA_BASE			0x100000
 ; the gdt entry to use for the kernel
 %define CS_KERNEL				0x10
 %define CS_KERNEL32				0x08
 ; other definitions
 %define STACK_SIZE				0x4000
--- a/kernel/arch/x86/imports/compile_typeinfo.sh
+++ b/kernel/arch/x86/imports/compile_typeinfo.sh
@@ -0,0 +1,7 @@
 cd ..
 for item in kernel/runtime/std/typeinfo/*.d;
 do
 	echo "--> $item"
 	ldc -nodefaultlib -g -I. -m32 -Ikernel/runtime/. -code-model=kernel -c $item -odbuild/dsss_objs/G/. ;\
 done
 cd build
--- a/kernel/arch/x86/linker.ld
+++ b/kernel/arch/x86/linker.ld
@@ -0,0 +1,134 @@
 /*
 * linker.ld
 *
 *  This script is given as the only script to the linker
 *  Will map boot.S to LMA, and then everything else
 *  will be linked to the VMA and mapped at the LMA
 *  _etext, _edata, _end are defined here
 *
 */
 /*
 *  KERNEL LINK LOCATIONS
 *
 *  these are the locations to map to
 *  they need to be set within boot.h
 *  as well
 *
 */
 kernel_VMA = 0x100000;
 kernel_LMA = 0x100000;
 /* start from the entry point */
 ENTRY(_start)
 SECTIONS
 {
    /* link from LMA */
    . = kernel_LMA;
 	_kernelLMA = .;
 	_boot = .;
    /* boot.S is ran in linear addresses */
    .text_boot :
    {
        dsss_objs/O/kernel.arch.x86.boot.boot.o (.text)
    }
 	_eboot = .;
 	/* The following is for the trampoline code, if and when
 	 *   multiprocessor support will be necessary.
 	 */
 	/*	PROVIDE(_trampoline = .);
 	.text_trampoline ALIGN(0x1000) :
 	{
 		dsss_objs/G/kernel.arch.x86_64.boot.trampoline.o (.text)
 	}
 	PROVIDE(_etrampoline = .); */
    /* link from VMA */
    . = . + kernel_VMA;
 	_text = .;
 	_kernel = .;
 	_kernelVMA = kernel_VMA;
    /* the rest of the code links to higher memory */
    .text : AT(ADDR(.text) - kernel_VMA + kernel_LMA)
    {
        code = .;
        *(.text)
 		*(.text*)
        /* read only data */
        *(.rodata*)
        *(.rdata*)
        . = ALIGN(4096);
    }
 	/*PROVIDE(_ekernel = .);*/
    /* _etext defined */
    _etext = .; PROVIDE(etext = .);
 	_data = .;
    /* data section */
    .data : AT(ADDR(.data) - kernel_VMA + kernel_LMA)
    {
        data = .;
        *(.data)
        /* constructors and deconstructors
           (if needed, doesn't hurt) */
        start_ctors = .;
        *(.ctor*)
        end_ctors = .;
        start_dtors = .;
        *(.dtor*)
        end_dtors = .;
        . = ALIGN(4096);
    }
    /* _edata defined */
    _edata = .; PROVIDE (edata = .);
 	_bss = .;
    /* static code */
    .bss : AT(ADDR(.bss) - kernel_VMA + kernel_LMA)
    {
        *(.bss)
        . = ALIGN(4096);
    }
 	_ebss = .;
    /*  */
    .ehframe : AT(ADDR(.ehframe) - kernel_VMA + kernel_LMA)
    {
        ehframe = .;
        *(.ehframe)
        . = ALIGN(4096);
    }
    /* _end defined (for posterity and tradition) */
    _end = .; PROVIDE (end = .);
 	_ekernel = .;
 }
--- a/kernel/arch/x86/load.s
+++ b/kernel/arch/x86/load.s
@@ -1,101 +0,0 @@
 ; load.s
 ; entry is from boot.s
 bits 64
 ; Everywhere you see some weird addition logic
 ; This is to fit the addresses into 32 bit sizes
 ; Note, they will sign extend!
 section .text
 ; include useful definitions
 %include "defines.mac"
 ; extern to kmain.d
 extern kmain
 global start64
 start64:
 	; Initialize the 64 bit stack pointer.
 	mov rsp, ((stack - KERNEL_VMA_BASE) + STACK_SIZE)
 	; Set up the stack for the return.
 	push CS_KERNEL
 	; RAX - the address to return to
 	mov rax, KERNEL_VMA_BASE >> 32
 	shl rax, 32
 	or rax, long_entry - (KERNEL_VMA_BASE & 0xffffffff00000000)
 	push rax
 	; Go into canonical higher half
 	; It uses a trick to update the program counter
 	;   across a 64 bit address space
 	ret
 long_entry:
 	; From here on out, we are running instructions
 	; within the higher half (0xffffffff80000000 ... )
 	; We can safely upmap the lower half, we do not
 	; need an identity mapping of this region
 	; set up a 64 bit virtual stack
 	mov rax, KERNEL_VMA_BASE >> 32
 	shl rax, 32
 	or rax, stack - (KERNEL_VMA_BASE & 0xffffffff00000000)
 	mov rsp, rax
 	; set cpu flags
 	push 0
 	lss eax, [rsp]
 	popf
 	; set the input/output permission level to 3
 	; it will allow all access
 	pushf
 	pop rax
 	or rax, 0x3000
 	push rax
 	popf
 	; update the multiboot struct to point to a
 	; virtual address
 	add rsi, (KERNEL_VMA_BASE & 0xffffffff)
 	; push the parameters (just in case)
 	push rsi
 	push rdi
 	; call kmain
 	call kmain
 	; we should not get here
 haltloop:
 	hlt
 	jmp haltloop
 	nop
 	nop
 	nop
 ; stack space
 global stack
 align 4096
 stack:
 	%rep STACK_SIZE
 	dd 0
 	%endrep
--- a/kernel/arch/x86_64/imports/compile_typeinfo.sh
+++ b/kernel/arch/x86_64/imports/compile_typeinfo.sh
@@ -0,0 +1,7 @@
 cd ..
 for item in kernel/runtime/std/typeinfo/*.d;
 do
 	echo "--> $item"
 	ldc -nodefaultlib -g -I. -m64 -Ikernel/runtime/. -code-model=kernel -c $item -odbuild/dsss_objs/G/. ;\
 done
 cd build
--- a/kernel/arch/x86_64/linker.ld
+++ b/kernel/arch/x86_64/linker.ld
@@ -34,7 +34,7 @@ SECTIONS
    /* boot.S is ran in linear addresses */
    .text_boot :
    {
-        dsss_objs/G/kernel.arch.x86_64.boot.boot.o (.text)
+        dsss_objs/O/kernel.arch.x86_64.boot.boot.o (.text)
    }
 	_eboot = .;
--- a/kernel/runtime/precision.d
+++ b/kernel/runtime/precision.d
@@ -0,0 +1,600 @@
 /*
 * precision.d
 *
 * This file implements a multiprecision divide function for non 64-bit
 * systems. This original license for the file used as reference is below.
 * The file originally was located:
 *		http://fxr.watson.org/fxr/source/libkern/qdivrem.c
 * It has been updated for the D programming language and for usage
 * within the XOmB kernel and XOmB Bare Bones packages.
 *
 * Author: Dave Wilkinson, The Regents of the University of California.
 *
 */
 module kernel.runtime.precision;
 /*-
 * Copyright (c) 1992, 1993
 *      The Regents of the University of California.  All rights reserved.
 *
 * This software was developed by the Computer Systems Engineering group
 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
 * contributed to Berkeley.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by the University of
 *      California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD: src/sys/libkern/qdivrem.c,v 1.8 1999/08/28 00:46:35 peter Exp $
 * $DragonFly: src/sys/libkern/qdivrem.c,v 1.4 2004/01/26 11:09:44 joerg Exp $
 */
 /*
 * Multiprecision divide.  This algorithm is from Knuth vol. 2 (2nd ed),
 * section 4.3.1, pp. 257--259.
 */
 //-------------
 /*#include <sys/cdefs.h>
 #include <sys/types.h>
 #include <sys/limits.h>
 #include <sys/syslimits.h>*/
 /*
 * Depending on the desired operation, we view a 64 bit integer (a long)
 * in these particular ways.
 */
 union uu {
 	long	l;
 	ulong	ul;
 	int		si[2];
 	uint	ui[2];
 }
 // These are architecture specific, and should be defined in the
 // architecture import as a definition sheet.
 const size_t LONG_HIGHWORD = 1;
 const size_t LONG_LOWWORD = 0;
 const size_t BYTE_BITS = 8;
 /*
 * Define high and low longwords. (endian-ness)
 */
 alias LONG_HIGHWORD H;
 alias LONG_LOWWORD L;
 /*
 * Total number of bits in a quad_t and in the pieces that make it up.
 * These are used for shifting, and also below for halfword extraction
 * and assembly.
 */
 const uint QUAD_BITS	= (8 * BYTE_BITS);
 const uint LONG_BITS	= (4 * BYTE_BITS);
 const uint HALF_BITS	= (4 * BYTE_BITS / 2);
 /*
 * Extract high and low shortwords from longword, and move low shortword of
 * longword to upper half of long, i.e., produce the upper longword of
 * ((quad_t)(x) << (number_of_bits_in_long/2)).  (`x' must actually be u_long.)
 *
 * These are used in the multiply code, to split a longword into upper
 * and lower halves, and to reassemble a product as a quad_t, shifted left
 * (sizeof(long)*CHAR_BIT/2).
 */
 uint HHALF(uint x) {
 	return x >> HALF_BITS;
 }
 int LHALF(uint x) {
 	return x & ((1 << HALF_BITS) - 1);
 }
 uint LHUP(uint x) {
 	return x << HALF_BITS;
 }
 typedef uint qshift_t;
 /*
 quad_t          __ashldi3(quad_t, qshift_t);
 quad_t          __ashrdi3(quad_t, qshift_t);
 int             __cmpdi2(quad_t a, quad_t b);
 quad_t          __divdi3(quad_t a, quad_t b);
 quad_t          __lshrdi3(quad_t, qshift_t);
 quad_t          __moddi3(quad_t a, quad_t b);
 u_quad_t        __qdivrem(u_quad_t u, u_quad_t v, u_quad_t *rem);
 u_quad_t        __udivdi3(u_quad_t a, u_quad_t b);
 u_quad_t        __umoddi3(u_quad_t a, u_quad_t b);
 int             __ucmpdi2(u_quad_t a, u_quad_t b);
 */
 // ------------------
 // digit base
 const uint B = (1 << HALF_BITS);
 /* Combine two `digits' to make a single two-digit number. */
 uint COMBINE(uint a, uint b) {
 	return (a << HALF_BITS) | b;
 }
 alias uint digit;
 /*
 * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
 * `fall out' the left (there never will be any such anyway).
 * We may assume len >= 0.  NOTE THAT THIS WRITES len+1 DIGITS.
 */
 void shl(digit* p, int len, int sh)
 {
 	int i;
 	for (i = 0; i < len; i++)
 	{
 		p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
 	}
 	p[i] = LHALF(p[i] << sh);
 }
 /*
 * qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
 *
 * We do this in base 2-sup-HALF_BITS, so that all intermediate products
 * fit within u_long.  As a consequence, the maximum length dividend and
 * divisor are 4 `digits' in this base (they are shorter if they have
 * leading zeros).
 */
 ulong qdivrem(ulong uq, ulong vq, ulong* arq)
 {
 	uu tmp;
 	digit* u;
 	digit* v;
 	digit* q;
 	digit v1, v2;
 	uint qhat, rhat, t;
 	int m, n, d, j, i;
 	digit[5] uspace;
 	digit[5] vspace;
 	digit[5] qspace;
 	/*
 	 * Take care of special cases: divide by zero, and u < v.
 	 */
 	if (vq == 0) {
 		/* divide by zero. */
 		volatile uint zero;
 		tmp.ui[H] = tmp.ui[L] = 1 / zero;
 		if (arq)
 		{
 			*arq = uq;
 		}
 		return (tmp.l);
 	}
 	if (uq < vq) {
 		if (arq) {
 			*arq = uq;
 		}
 		return (0);
 	}
 	u = &uspace[0];
 	v = &vspace[0];
 	q = &qspace[0];
 	/*
 	 * Break dividend and divisor into digits in base B, then
 	 * count leading zeros to determine m and n.  When done, we
 	 * will have:
 	 *      u = (u[1]u[2]...u[m+n]) sub B
 	 *      v = (v[1]v[2]...v[n]) sub B
 	 *      v[1] != 0
 	 *      1 < n <= 4 (if n = 1, we use a different division algorithm)
 	 *      m >= 0 (otherwise u < v, which we already checked)
 	 *      m + n = 4
 	 * and thus
 	 *      m = 4 - n <= 2
 	 */
 	tmp.ul = uq;
 	u[0] = 0;
 	u[1] = HHALF(tmp.ui[H]);
 	u[2] = LHALF(tmp.ui[H]);
 	u[3] = HHALF(tmp.ui[L]);
 	u[4] = LHALF(tmp.ui[L]);
 	tmp.ul = vq;
 	v[1] = HHALF(tmp.ui[H]);
 	v[2] = LHALF(tmp.ui[H]);
 	v[3] = HHALF(tmp.ui[L]);
 	v[4] = LHALF(tmp.ui[L]);
 	for (n = 4; v[1] == 0; v++) {
 		if (--n == 1) {
 			uint rbj;     /* r*B+u[j] (not root boy jim) */
 			digit q1, q2, q3, q4;
 			/*
 			 * Change of plan, per exercise 16.
 			 *      r = 0;
 			 *      for j = 1..4:
 			 *              q[j] = floor((r*B + u[j]) / v),
 			 *              r = (r*B + u[j]) % v;
 			 * We unroll this completely here.
 			 */
 			t = v[2];       /* nonzero, by definition */
 			q1 = u[1] / t;
 			rbj = COMBINE(u[1] % t, u[2]);
 			q2 = rbj / t;
 			rbj = COMBINE(rbj % t, u[3]);
 			q3 = rbj / t;
 			rbj = COMBINE(rbj % t, u[4]);
 			q4 = rbj / t;
 			if (arq)
 			        *arq = rbj % t;
 			tmp.ui[H] = COMBINE(q1, q2);
 			tmp.ui[L] = COMBINE(q3, q4);
 			return (tmp.l);
 		}
 	}
 	/*
 	 * By adjusting q once we determine m, we can guarantee that
 	 * there is a complete four-digit quotient at &qspace[1] when
 	 * we finally stop.
 	 */
 	for (m = 4 - n; u[1] == 0; u++) {
 		m--;
 	}
 	for (i = 4 - m; --i >= 0;) {
 		q[i] = 0;
 	}
 	q += 4 - m;
 	/*
 	 * Here we run Program D, translated from MIX to C and acquiring
 	 * a few minor changes.
 	 *
 	 * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
 	 */
 	d = 0;
 	for (t = v[1]; t < B / 2; t <<= 1) {
 		d++;
 	}
 	if (d > 0) {
 		shl(&u[0], m + n, d);           /* u <<= d */
 		shl(&v[1], n - 1, d);           /* v <<= d */
 	}
 	/*
 	 * D2: j = 0.
 	 */
 	j = 0;
 	v1 = v[1];      /* for D3 -- note that v[1..n] are constant */
 	v2 = v[2];      /* for D3 */
 	do {
 		digit uj0, uj1, uj2;
 		/*
 		 * D3: Calculate qhat (\^q, in TeX notation).
 		 * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
 		 * let rhat = (u[j]*B + u[j+1]) mod v[1].
 		 * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
 		 * decrement qhat and increase rhat correspondingly.
 		 * Note that if rhat >= B, v[2]*qhat < rhat*B.
 		 */
 		uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
 		uj1 = u[j + 1]; /* for D3 only */
 		uj2 = u[j + 2]; /* for D3 only */
 		if (uj0 == v1) {
 	        qhat = B;
 	        rhat = uj1;
 	        goto qhat_too_big;
 		} else {
 			uint nn = COMBINE(uj0, uj1);
 			qhat = nn / v1;
 			rhat = nn % v1;
 		}
 		while (v2 * qhat > COMBINE(rhat, uj2)) {
 qhat_too_big:
 			qhat--;
 			if ((rhat += v1) >= B) {
 				break;
 			}
 		}
 		/*
 		 * D4: Multiply and subtract.
 		 * The variable `t' holds any borrows across the loop.
 		 * We split this up so that we do not require v[0] = 0,
 		 * and to eliminate a final special case.
 		 */
 		for (t = 0, i = n; i > 0; i--) {
 			t = u[i + j] - v[i] * qhat - t;
 			u[i + j] = LHALF(t);
 			t = (B - HHALF(t)) & (B - 1);
 		}
 		t = u[j] - t;
 		u[j] = LHALF(t);
 		/*
 		 * D5: test remainder.
 		 * There is a borrow if and only if HHALF(t) is nonzero;
 		 * in that (rare) case, qhat was too large (by exactly 1).
 		 * Fix it by adding v[1..n] to u[j..j+n].
 		 */
 		if (HHALF(t)) {
 			qhat--;
 			for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
 			        t += u[i + j] + v[i];
 			        u[i + j] = LHALF(t);
 			        t = HHALF(t);
 			}
 			u[j] = LHALF(u[j] + t);
 		}
 		q[j] = qhat;
 	} while (++j <= m);             /* D7: loop on j. */
 	/*
 	 * If caller wants the remainder, we have to calculate it as
 	 * u[m..m+n] >> d (this is at most n digits and thus fits in
 	 * u[m+1..m+n], but we may need more source digits).
 	 */
 	if (arq) {
 		if (d) {
 			for (i = m + n; i > m; --i)
 			{
 				u[i] = (u[i] >> d) | LHALF(u[i - 1] << (HALF_BITS - d));
 			}
 			u[i] = 0;
 		}
 		tmp.ui[H] = COMBINE(uspace[1], uspace[2]);
 		tmp.ui[L] = COMBINE(uspace[3], uspace[4]);
 		*arq = tmp.l;
 	}
 	tmp.ui[H] = COMBINE(qspace[1], qspace[2]);
 	tmp.ui[L] = COMBINE(qspace[3], qspace[4]);
 	return (tmp.l);
 }
 // Return 0, 1, or 2 as a <, =, > b respectively.
 // Neither a nor b are considered signed.
 int ucmpdi2(ulong a, ulong b)
 {
 	uu aa, bb;
 	aa.ul = a;
 	bb.ul = b;
 	return (aa.ui[H] < bb.ui[H] ? 0 : aa.ui[H] > bb.ui[H] ? 2 :
 			aa.ui[L] < bb.ui[L] ? 0 : aa.ui[L] > bb.ui[L] ? 2 : 1);
 }
 extern(C) int __ucmpdi2(ulong a, ulong b)
 {
 	return ucmpdi2(a,b);
 }
 // Divide two unsigned longs
 ulong udivdi3(ulong a, ulong b)
 {
 	return qdivrem(a, b, null);
 }
 extern(C) ulong __udivdi3(ulong a, ulong b)
 {
 	return udivdi3(a,b);
 }
 // Modulus two unsigned longs
 ulong umoddi3(ulong a, ulong b)
 {
 	ulong r;
 	qdivrem(a, b, &r);
 	return r;
 }
 extern(C) ulong __umoddi3(ulong a, ulong b)
 {
 	return umoddi3(a,b);
 }
 // Logical shift right of an unsigned long
 long lshrdi3(long a, qshift_t shift)
 {
 	uu aa;
 	aa.l = a;
 	if (shift >= LONG_BITS) {
 		aa.ui[L] = shift >= QUAD_BITS ? 0 :
 			aa.ui[H] >> (shift - LONG_BITS);
 		aa.ui[H] = 0;
 	} else if (shift > 0) {
 		aa.ui[L] = (aa.ui[L] >> shift) |
 			(aa.ui[H] << (LONG_BITS - shift));
 		aa.ui[H] >>= shift;
 	}
 	return aa.l;
 }
 extern(C) long __lshrdi3(long a, qshift_t shift)
 {
 	return lshrdi3(a, shift);
 }
 // Arithmetic Shift Left of a signed long
 // A.K.A. Logical Shift Left
 long ashldi3(long a, qshift_t shift)
 {
 	uu aa;
 	aa.l = a;
 	if (shift >= LONG_BITS) {
 		aa.ui[H] = shift >= QUAD_BITS ? 0 :
 			aa.ui[L] << (shift - LONG_BITS);
 		aa.ui[L] = 0;
 	} else if (shift > 0) {
 		aa.ui[H] = (aa.ui[H] << shift) |
 			(aa.ui[L] >> (LONG_BITS - shift));
 		aa.ui[L] <<= shift;
 	}
 	return aa.l;
 }
 extern(C) long __ashldi3(long a, qshift_t shift)
 {
 	return ashldi3(a, shift);
 }
 // Arithmetic Shift Right of a signed long
 long ashrdi3(long a, qshift_t shift)
 {
 	uu aa;
 	aa.l = a;
 	if (shift >= LONG_BITS) {
 		int s;
 		/* Smear bits rightward using the machine's right-shift method,
 		   whether that is sign extension or zero fill, to get the
 		   'sign word' s. Note that shifting by LONG_BITS is
 		   undefined, so we shift (LONG_BITS-1), then 1 more, to get
 		   our answer */
 		s = (aa.si[H] >> (LONG_BITS - 1)) >> 1;
 		aa.ui[L] = shift >= QUAD_BITS ? s :
 			aa.si[H] >> (shift - LONG_BITS);
 		aa.ui[H] = s;
 	} else if (shift > 0) {
 		aa.ui[L] = (aa.ui[L] >> shift) |
 			(aa.ui[H] << (LONG_BITS - shift));
 		aa.si[H] >>= shift;
 	}
 	return aa.l;
 }
 extern(C) long __ashrdi3(long a, qshift_t shift)
 {
 	return ashrdi3(a,shift);
 }
 // Return 0, 1, or 2 as a <, =, > b respectively.
 // Both a and b are considered signed -- which means only
 // the high word is signed.
 int cmpdi2(long a, long b)
 {
 	uu aa, bb;
 	aa.l = a;
 	bb.l = b;
 	return (aa.si[H] < bb.si[H] ? 0 : aa.si[H] > bb.si[H] ? 2 :
 			aa.ui[L] < bb.ui[L] ? 0 : aa.ui[L] > bb.ui[L] ? 2 : 1);
 }
 extern(C) int __cmpdi2(long a, long b)
 {
 	return cmpdi2(a,b);
 }
 // Divide two signed longs
 long divdi3(long a, long b)
 {
 	ulong ua, ub, ul;
 	int neg;
 	if (a < 0) {
 		ua = -cast(ulong)a;
 		neg = 1;
 	} else {
 		ua = a;
 		neg = 0;
 	}
 	if (b < 0) {
 		ub = -cast(ulong)b;
 		neg ^= 1;
 	} else {
 		ub = b;
 	}
 	ul = qdivrem(ua, ub, null);
 	return (neg ? -ul : ul);
 }
 extern(C) long __divdi3(long a, long b)
 {
 	return divdi3(a,b);
 }
 // Modulus two signed longs
 long moddi3(long a, long b)
 {
 	ulong ua, ub, ur;
 	int neg;
 	if (a < 0) {
 		ua = -cast(ulong)a;
 		neg = 1;
 	} else {
 		ua = a;
 		neg = 0;
 	}
 	if (b < 0) {
 		ub = -cast(ulong)b;
 	} else {
 		ub = b;
 	}
 	qdivrem(ua, ub, &ur);
 	return (neg ? -ur : ur);
 }
 extern(C) long __moddi3(long a, long b)
 {
 	return moddi3(a,b);
 }