diff --git a/include/stdbool.h b/include/stdbool.h new file mode 100644 index 0000000..403b956 --- /dev/null +++ b/include/stdbool.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000 Jeroen Ruigrok van der Werven + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef __bool_true_false_are_defined +#define __bool_true_false_are_defined 1 + +#ifndef __cplusplus + +#define false 0 +#define true 1 + +#define bool _Bool + +#endif /* !__cplusplus */ +#endif /* __bool_true_false_are_defined */ diff --git a/include/stdint.h b/include/stdint.h index 655e40f..8f9e6db 100644 --- a/include/stdint.h +++ b/include/stdint.h @@ -5,9 +5,26 @@ typedef signed char int8_t; typedef short int int16_t; typedef int int32_t; +typedef long long int64_t; typedef unsigned char uint8_t; typedef unsigned short int uint16_t; typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; + +#define INT8_C(x) x +#define UINT8_C(x) x##U + +#define INT16_C(x) x +#define UINT16_C(x) x##U + +#define INT32_C(x) x +#define UINT32_C(x) x##U + +#define INT64_C(x) x##LL +#define UINT64_C(x) x##ULL + +#define INTMAX_C(x) x##LL +#define UINTMAX_C(x) x##ULL #endif diff --git a/src/Makefile b/src/Makefile index 3fb4047..7fadd29 100644 --- a/src/Makefile +++ b/src/Makefile @@ -9,7 +9,8 @@ include $(TOPSRC)/target.mk # Programs that live in subdirectories, and have makefiles of their own. # SUBDIR = startup-$(MACHINE) libc libm libutil libtermlib libcurses \ - libvmf libwiznet libreadline libgpanel share cmd games man + libvmf libwiznet libreadline libgpanel share cmd games man \ + libclang all: $(SUBDIR) diff --git a/src/libclang/Makefile b/src/libclang/Makefile new file mode 100644 index 0000000..312e24c --- /dev/null +++ b/src/libclang/Makefile @@ -0,0 +1,31 @@ +TOPSRC = $(shell cd ../..; pwd) +include $(TOPSRC)/target.mk + +CFLAGS += ${DEFS} -Werror -Wall + +OBJS = adddf3.o \ + addsf3.o \ + divdf3.o \ + divsf3.o \ + comparedf2.o \ + extendsfdf2.o \ + fixdfsi.o \ + floatsidf.o \ + floatsisf.o \ + muldf3.o \ + mulsf3.o \ + subsf3.o + +all: ../libclang.a + +../libclang.a: ${OBJS} + $(AR) cru $@ ${OBJS} + $(RANLIB) $@ + +install: all +# ${INSTALLDIR} ${DESTDIR}/lib +# ${INSTALL} ../libclang.a ${DESTDIR}/lib/libclang.a +# $(RANLIB) ${DESTDIR}/lib/libclang.a + +clean: + rm -f ../libclang.a *.o *~ tags diff --git a/src/libclang/README.txt b/src/libclang/README.txt new file mode 100644 index 0000000..d66d725 --- /dev/null +++ b/src/libclang/README.txt @@ -0,0 +1,353 @@ +Compiler-RT +================================ + +This directory and its subdirectories contain source code for the compiler +support routines. + +Compiler-RT is open source software. You may freely distribute it under the +terms of the license agreement found in LICENSE.txt. + +================================ + +This is a replacement library for libgcc. Each function is contained +in its own file. Each function has a corresponding unit test under +test/Unit. + +A rudimentary script to test each file is in the file called +test/Unit/test. + +Here is the specification for this library: + +http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc + +Please note that the libgcc specification explicitly mentions actual types of +arguments and returned values being expressed with machine modes. +In some cases particular types such as "int", "unsigned", "long long", etc. +may be specified just as examples there. + +Here is a synopsis of the contents of this library: + +typedef int32_t si_int; +typedef uint32_t su_int; + +typedef int64_t di_int; +typedef uint64_t du_int; + +// Integral bit manipulation + +di_int __ashldi3(di_int a, si_int b); // a << b +ti_int __ashlti3(ti_int a, si_int b); // a << b + +di_int __ashrdi3(di_int a, si_int b); // a >> b arithmetic (sign fill) +ti_int __ashrti3(ti_int a, si_int b); // a >> b arithmetic (sign fill) +di_int __lshrdi3(di_int a, si_int b); // a >> b logical (zero fill) +ti_int __lshrti3(ti_int a, si_int b); // a >> b logical (zero fill) + +int __clzsi2(si_int a); // count leading zeros +int __clzdi2(di_int a); // count leading zeros +int __clzti2(ti_int a); // count leading zeros +int __ctzsi2(si_int a); // count trailing zeros +int __ctzdi2(di_int a); // count trailing zeros +int __ctzti2(ti_int a); // count trailing zeros + +int __ffssi2(si_int a); // find least significant 1 bit +int __ffsdi2(di_int a); // find least significant 1 bit +int __ffsti2(ti_int a); // find least significant 1 bit + +int __paritysi2(si_int a); // bit parity +int __paritydi2(di_int a); // bit parity +int __parityti2(ti_int a); // bit parity + +int __popcountsi2(si_int a); // bit population +int __popcountdi2(di_int a); // bit population +int __popcountti2(ti_int a); // bit population + +uint32_t __bswapsi2(uint32_t a); // a byteswapped +uint64_t __bswapdi2(uint64_t a); // a byteswapped + +// Integral arithmetic + +di_int __negdi2 (di_int a); // -a +ti_int __negti2 (ti_int a); // -a +di_int __muldi3 (di_int a, di_int b); // a * b +ti_int __multi3 (ti_int a, ti_int b); // a * b +si_int __divsi3 (si_int a, si_int b); // a / b signed +di_int __divdi3 (di_int a, di_int b); // a / b signed +ti_int __divti3 (ti_int a, ti_int b); // a / b signed +su_int __udivsi3 (su_int n, su_int d); // a / b unsigned +du_int __udivdi3 (du_int a, du_int b); // a / b unsigned +tu_int __udivti3 (tu_int a, tu_int b); // a / b unsigned +si_int __modsi3 (si_int a, si_int b); // a % b signed +di_int __moddi3 (di_int a, di_int b); // a % b signed +ti_int __modti3 (ti_int a, ti_int b); // a % b signed +su_int __umodsi3 (su_int a, su_int b); // a % b unsigned +du_int __umoddi3 (du_int a, du_int b); // a % b unsigned +tu_int __umodti3 (tu_int a, tu_int b); // a % b unsigned +du_int __udivmoddi4(du_int a, du_int b, du_int* rem); // a / b, *rem = a % b unsigned +tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); // a / b, *rem = a % b unsigned +su_int __udivmodsi4(su_int a, su_int b, su_int* rem); // a / b, *rem = a % b unsigned +si_int __divmodsi4(si_int a, si_int b, si_int* rem); // a / b, *rem = a % b signed +di_int __divmoddi4(di_int a, di_int b, di_int* rem); // a / b, *rem = a % b signed +ti_int __divmodti4(ti_int a, ti_int b, ti_int* rem); // a / b, *rem = a % b signed + + + +// Integral arithmetic with trapping overflow + +si_int __absvsi2(si_int a); // abs(a) +di_int __absvdi2(di_int a); // abs(a) +ti_int __absvti2(ti_int a); // abs(a) + +si_int __negvsi2(si_int a); // -a +di_int __negvdi2(di_int a); // -a +ti_int __negvti2(ti_int a); // -a + +si_int __addvsi3(si_int a, si_int b); // a + b +di_int __addvdi3(di_int a, di_int b); // a + b +ti_int __addvti3(ti_int a, ti_int b); // a + b + +si_int __subvsi3(si_int a, si_int b); // a - b +di_int __subvdi3(di_int a, di_int b); // a - b +ti_int __subvti3(ti_int a, ti_int b); // a - b + +si_int __mulvsi3(si_int a, si_int b); // a * b +di_int __mulvdi3(di_int a, di_int b); // a * b +ti_int __mulvti3(ti_int a, ti_int b); // a * b + + +// Integral arithmetic which returns if overflow + +si_int __mulosi4(si_int a, si_int b, int* overflow); // a * b, overflow set to one if result not in signed range +di_int __mulodi4(di_int a, di_int b, int* overflow); // a * b, overflow set to one if result not in signed range +ti_int __muloti4(ti_int a, ti_int b, int* overflow); // a * b, overflow set to + one if result not in signed range + + +// Integral comparison: a < b -> 0 +// a == b -> 1 +// a > b -> 2 + +si_int __cmpdi2 (di_int a, di_int b); +si_int __cmpti2 (ti_int a, ti_int b); +si_int __ucmpdi2(du_int a, du_int b); +si_int __ucmpti2(tu_int a, tu_int b); + +// Integral / floating point conversion + +di_int __fixsfdi( float a); +di_int __fixdfdi( double a); +di_int __fixxfdi(long double a); + +ti_int __fixsfti( float a); +ti_int __fixdfti( double a); +ti_int __fixxfti(long double a); +uint64_t __fixtfdi(long double input); // ppc only, doesn't match documentation + +su_int __fixunssfsi( float a); +su_int __fixunsdfsi( double a); +su_int __fixunsxfsi(long double a); + +du_int __fixunssfdi( float a); +du_int __fixunsdfdi( double a); +du_int __fixunsxfdi(long double a); + +tu_int __fixunssfti( float a); +tu_int __fixunsdfti( double a); +tu_int __fixunsxfti(long double a); +uint64_t __fixunstfdi(long double input); // ppc only + +float __floatdisf(di_int a); +double __floatdidf(di_int a); +long double __floatdixf(di_int a); +long double __floatditf(int64_t a); // ppc only + +float __floattisf(ti_int a); +double __floattidf(ti_int a); +long double __floattixf(ti_int a); + +float __floatundisf(du_int a); +double __floatundidf(du_int a); +long double __floatundixf(du_int a); +long double __floatunditf(uint64_t a); // ppc only + +float __floatuntisf(tu_int a); +double __floatuntidf(tu_int a); +long double __floatuntixf(tu_int a); + +// Floating point raised to integer power + +float __powisf2( float a, int b); // a ^ b +double __powidf2( double a, int b); // a ^ b +long double __powixf2(long double a, int b); // a ^ b +long double __powitf2(long double a, int b); // ppc only, a ^ b + +// Complex arithmetic + +// (a + ib) * (c + id) + + float _Complex __mulsc3( float a, float b, float c, float d); + double _Complex __muldc3(double a, double b, double c, double d); +long double _Complex __mulxc3(long double a, long double b, + long double c, long double d); +long double _Complex __multc3(long double a, long double b, + long double c, long double d); // ppc only + +// (a + ib) / (c + id) + + float _Complex __divsc3( float a, float b, float c, float d); + double _Complex __divdc3(double a, double b, double c, double d); +long double _Complex __divxc3(long double a, long double b, + long double c, long double d); +long double _Complex __divtc3(long double a, long double b, + long double c, long double d); // ppc only + + +// Runtime support + +// __clear_cache() is used to tell process that new instructions have been +// written to an address range. Necessary on processors that do not have +// a unified instruction and data cache. +void __clear_cache(void* start, void* end); + +// __enable_execute_stack() is used with nested functions when a trampoline +// function is written onto the stack and that page range needs to be made +// executable. +void __enable_execute_stack(void* addr); + +// __gcc_personality_v0() is normally only called by the system unwinder. +// C code (as opposed to C++) normally does not need a personality function +// because there are no catch clauses or destructors to be run. But there +// is a C language extension __attribute__((cleanup(func))) which marks local +// variables as needing the cleanup function "func" to be run when the +// variable goes out of scope. That includes when an exception is thrown, +// so a personality handler is needed. +_Unwind_Reason_Code __gcc_personality_v0(int version, _Unwind_Action actions, + uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, + _Unwind_Context_t context); + +// for use with some implementations of assert() in +void __eprintf(const char* format, const char* assertion_expression, + const char* line, const char* file); + +// for systems with emulated thread local storage +void* __emutls_get_address(struct __emutls_control*); + + +// Power PC specific functions + +// There is no C interface to the saveFP/restFP functions. They are helper +// functions called by the prolog and epilog of functions that need to save +// a number of non-volatile float point registers. +saveFP +restFP + +// PowerPC has a standard template for trampoline functions. This function +// generates a custom trampoline function with the specific realFunc +// and localsPtr values. +void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, + const void* realFunc, void* localsPtr); + +// adds two 128-bit double-double precision values ( x + y ) +long double __gcc_qadd(long double x, long double y); + +// subtracts two 128-bit double-double precision values ( x - y ) +long double __gcc_qsub(long double x, long double y); + +// multiples two 128-bit double-double precision values ( x * y ) +long double __gcc_qmul(long double x, long double y); + +// divides two 128-bit double-double precision values ( x / y ) +long double __gcc_qdiv(long double a, long double b); + + +// ARM specific functions + +// There is no C interface to the switch* functions. These helper functions +// are only needed by Thumb1 code for efficient switch table generation. +switch16 +switch32 +switch8 +switchu8 + +// There is no C interface to the *_vfp_d8_d15_regs functions. There are +// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use +// SJLJ for exceptions, each function with a catch clause or destuctors needs +// to save and restore all registers in it prolog and epliog. But there is +// no way to access vector and high float registers from thumb1 code, so the +// compiler must add call outs to these helper functions in the prolog and +// epilog. +restore_vfp_d8_d15_regs +save_vfp_d8_d15_regs + + +// Note: long ago ARM processors did not have floating point hardware support. +// Floating point was done in software and floating point parameters were +// passed in integer registers. When hardware support was added for floating +// point, new *vfp functions were added to do the same operations but with +// floating point parameters in floating point registers. + +// Undocumented functions + +float __addsf3vfp(float a, float b); // Appears to return a + b +double __adddf3vfp(double a, double b); // Appears to return a + b +float __divsf3vfp(float a, float b); // Appears to return a / b +double __divdf3vfp(double a, double b); // Appears to return a / b +int __eqsf2vfp(float a, float b); // Appears to return one + // iff a == b and neither is NaN. +int __eqdf2vfp(double a, double b); // Appears to return one + // iff a == b and neither is NaN. +double __extendsfdf2vfp(float a); // Appears to convert from + // float to double. +int __fixdfsivfp(double a); // Appears to convert from + // double to int. +int __fixsfsivfp(float a); // Appears to convert from + // float to int. +unsigned int __fixunssfsivfp(float a); // Appears to convert from + // float to unsigned int. +unsigned int __fixunsdfsivfp(double a); // Appears to convert from + // double to unsigned int. +double __floatsidfvfp(int a); // Appears to convert from + // int to double. +float __floatsisfvfp(int a); // Appears to convert from + // int to float. +double __floatunssidfvfp(unsigned int a); // Appears to convert from + // unisgned int to double. +float __floatunssisfvfp(unsigned int a); // Appears to convert from + // unisgned int to float. +int __gedf2vfp(double a, double b); // Appears to return __gedf2 + // (a >= b) +int __gesf2vfp(float a, float b); // Appears to return __gesf2 + // (a >= b) +int __gtdf2vfp(double a, double b); // Appears to return __gtdf2 + // (a > b) +int __gtsf2vfp(float a, float b); // Appears to return __gtsf2 + // (a > b) +int __ledf2vfp(double a, double b); // Appears to return __ledf2 + // (a <= b) +int __lesf2vfp(float a, float b); // Appears to return __lesf2 + // (a <= b) +int __ltdf2vfp(double a, double b); // Appears to return __ltdf2 + // (a < b) +int __ltsf2vfp(float a, float b); // Appears to return __ltsf2 + // (a < b) +double __muldf3vfp(double a, double b); // Appears to return a * b +float __mulsf3vfp(float a, float b); // Appears to return a * b +int __nedf2vfp(double a, double b); // Appears to return __nedf2 + // (a != b) +double __negdf2vfp(double a); // Appears to return -a +float __negsf2vfp(float a); // Appears to return -a +float __negsf2vfp(float a); // Appears to return -a +double __subdf3vfp(double a, double b); // Appears to return a - b +float __subsf3vfp(float a, float b); // Appears to return a - b +float __truncdfsf2vfp(double a); // Appears to convert from + // double to float. +int __unorddf2vfp(double a, double b); // Appears to return __unorddf2 +int __unordsf2vfp(float a, float b); // Appears to return __unordsf2 + + +Preconditions are listed for each function at the definition when there are any. +Any preconditions reflect the specification at +http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc. + +Assumptions are listed in "int_lib.h", and in individual files. Where possible +assumptions are checked at compile time. diff --git a/src/libclang/adddf3.c b/src/libclang/adddf3.c new file mode 100644 index 0000000..26f11bf --- /dev/null +++ b/src/libclang/adddf3.c @@ -0,0 +1,24 @@ +//===-- lib/adddf3.c - Double-precision addition ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float addition. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_add_impl.inc" + +COMPILER_RT_ABI double __adddf3(double a, double b) { return __addXf3__(a, b); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI double __aeabi_dadd(double a, double b) { return __adddf3(a, b); } +#else +COMPILER_RT_ALIAS(__adddf3, __aeabi_dadd) +#endif +#endif diff --git a/src/libclang/addsf3.c b/src/libclang/addsf3.c new file mode 100644 index 0000000..9f1d517 --- /dev/null +++ b/src/libclang/addsf3.c @@ -0,0 +1,24 @@ +//===-- lib/addsf3.c - Single-precision addition ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float addition. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_add_impl.inc" + +COMPILER_RT_ABI float __addsf3(float a, float b) { return __addXf3__(a, b); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI float __aeabi_fadd(float a, float b) { return __addsf3(a, b); } +#else +COMPILER_RT_ALIAS(__addsf3, __aeabi_fadd) +#endif +#endif diff --git a/src/libclang/comparedf2.c b/src/libclang/comparedf2.c new file mode 100644 index 0000000..58290d8 --- /dev/null +++ b/src/libclang/comparedf2.c @@ -0,0 +1,151 @@ +//===-- lib/comparedf2.c - Double-precision comparisons -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// // This file implements the following soft-float comparison routines: +// +// __eqdf2 __gedf2 __unorddf2 +// __ledf2 __gtdf2 +// __ltdf2 +// __nedf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __ledf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gedf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unorddf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __ledf2( ) and __gedf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +enum LE_RESULT { LE_LESS = -1, LE_EQUAL = 0, LE_GREATER = 1, LE_UNORDERED = 1 }; + +COMPILER_RT_ABI enum LE_RESULT __ledf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + // If either a or b is NaN, they are unordered. + if (aAbs > infRep || bAbs > infRep) + return LE_UNORDERED; + + // If a and b are both zeros, they are equal. + if ((aAbs | bAbs) == 0) + return LE_EQUAL; + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a floating-point compare. + if ((aInt & bInt) >= 0) { + if (aInt < bInt) + return LE_LESS; + else if (aInt == bInt) + return LE_EQUAL; + else + return LE_GREATER; + } + + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + else { + if (aInt > bInt) + return LE_LESS; + else if (aInt == bInt) + return LE_EQUAL; + else + return LE_GREATER; + } +} + +#if defined(__ELF__) +// Alias for libgcc compatibility +COMPILER_RT_ALIAS(__ledf2, __cmpdf2) +#endif +COMPILER_RT_ALIAS(__ledf2, __eqdf2) +COMPILER_RT_ALIAS(__ledf2, __ltdf2) +COMPILER_RT_ALIAS(__ledf2, __nedf2) + +enum GE_RESULT { + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED +}; + +COMPILER_RT_ABI enum GE_RESULT __gedf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + if (aAbs > infRep || bAbs > infRep) + return GE_UNORDERED; + if ((aAbs | bAbs) == 0) + return GE_EQUAL; + if ((aInt & bInt) >= 0) { + if (aInt < bInt) + return GE_LESS; + else if (aInt == bInt) + return GE_EQUAL; + else + return GE_GREATER; + } else { + if (aInt > bInt) + return GE_LESS; + else if (aInt == bInt) + return GE_EQUAL; + else + return GE_GREATER; + } +} + +COMPILER_RT_ALIAS(__gedf2, __gtdf2) + +COMPILER_RT_ABI int +__unorddf2(fp_t a, fp_t b) { + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + return aAbs > infRep || bAbs > infRep; +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) { return __unorddf2(a, b); } +#else +COMPILER_RT_ALIAS(__unorddf2, __aeabi_dcmpun) +#endif +#endif + +#if defined(_WIN32) && !defined(__MINGW32__) +// The alias mechanism doesn't work on Windows except for MinGW, so emit +// wrapper functions. +int __eqdf2(fp_t a, fp_t b) { return __ledf2(a, b); } +int __ltdf2(fp_t a, fp_t b) { return __ledf2(a, b); } +int __nedf2(fp_t a, fp_t b) { return __ledf2(a, b); } +int __gtdf2(fp_t a, fp_t b) { return __gedf2(a, b); } +#endif diff --git a/src/libclang/divdf3.c b/src/libclang/divdf3.c new file mode 100644 index 0000000..4c11759 --- /dev/null +++ b/src/libclang/divdf3.c @@ -0,0 +1,29 @@ +//===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION + +#define NUMBER_OF_HALF_ITERATIONS 3 +#define NUMBER_OF_FULL_ITERATIONS 1 + +#include "fp_div_impl.inc" + +COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { return __divXf3__(a, b); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) { return __divdf3(a, b); } +#else +COMPILER_RT_ALIAS(__divdf3, __aeabi_ddiv) +#endif +#endif diff --git a/src/libclang/divsf3.c b/src/libclang/divsf3.c new file mode 100644 index 0000000..5744c01 --- /dev/null +++ b/src/libclang/divsf3.c @@ -0,0 +1,30 @@ +//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION + +#define NUMBER_OF_HALF_ITERATIONS 0 +#define NUMBER_OF_FULL_ITERATIONS 3 +#define USE_NATIVE_FULL_ITERATIONS + +#include "fp_div_impl.inc" + +COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { return __divXf3__(a, b); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) { return __divsf3(a, b); } +#else +COMPILER_RT_ALIAS(__divsf3, __aeabi_fdiv) +#endif +#endif diff --git a/src/libclang/extendsfdf2.c b/src/libclang/extendsfdf2.c new file mode 100644 index 0000000..8132d57 --- /dev/null +++ b/src/libclang/extendsfdf2.c @@ -0,0 +1,21 @@ +//===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SRC_SINGLE +#define DST_DOUBLE +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI double __extendsfdf2(float a) { return __extendXfYf2__(a); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI double __aeabi_f2d(float a) { return __extendsfdf2(a); } +#else +COMPILER_RT_ALIAS(__extendsfdf2, __aeabi_f2d) +#endif +#endif diff --git a/src/libclang/fixdfsi.c b/src/libclang/fixdfsi.c new file mode 100644 index 0000000..f546499 --- /dev/null +++ b/src/libclang/fixdfsi.c @@ -0,0 +1,23 @@ +//===-- fixdfsi.c - Implement __fixdfsi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI si_int __fixdfsi(fp_t a) { return __fixint(a); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI si_int __aeabi_d2iz(fp_t a) { return __fixdfsi(a); } +#else +COMPILER_RT_ALIAS(__fixdfsi, __aeabi_d2iz) +#endif +#endif diff --git a/src/libclang/floatsidf.c b/src/libclang/floatsidf.c new file mode 100644 index 0000000..28cf32f --- /dev/null +++ b/src/libclang/floatsidf.c @@ -0,0 +1,57 @@ +//===-- lib/floatsidf.c - integer -> double-precision conversion --*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to double-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t __floatsidf(si_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + if (a < 0) { + sign = signBit; + a = -a; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. Extra + // cast to unsigned int is necessary to get the correct behavior for + // the input INT_MIN. + const int shift = significandBits - exponent; + result = (rep_t)(su_int)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_i2d(si_int a) { return __floatsidf(a); } +#else +COMPILER_RT_ALIAS(__floatsidf, __aeabi_i2d) +#endif +#endif diff --git a/src/libclang/floatsisf.c b/src/libclang/floatsisf.c new file mode 100644 index 0000000..fe06040 --- /dev/null +++ b/src/libclang/floatsisf.c @@ -0,0 +1,65 @@ +//===-- lib/floatsisf.c - integer -> single-precision conversion --*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to single-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t __floatsisf(int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + if (a < 0) { + sign = signBit; + a = -a; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + if (exponent <= significandBits) { + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + } else { + const int shift = exponent - significandBits; + result = (rep_t)a >> shift ^ implicitBit; + rep_t round = (rep_t)a << (typeWidth - shift); + if (round > signBit) + result++; + if (round == signBit) + result += result & 1; + } + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_i2f(int a) { return __floatsisf(a); } +#else +COMPILER_RT_ALIAS(__floatsisf, __aeabi_i2f) +#endif +#endif diff --git a/src/libclang/fp_add_impl.inc b/src/libclang/fp_add_impl.inc new file mode 100644 index 0000000..ab63213 --- /dev/null +++ b/src/libclang/fp_add_impl.inc @@ -0,0 +1,172 @@ +//===----- lib/fp_add_impl.inc - floaing point addition -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float addition with the IEEE-754 default rounding +// (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" +#include "fp_mode.h" + +static __inline fp_t __addXf3__(fp_t a, fp_t b) { + rep_t aRep = toRep(a); + rep_t bRep = toRep(b); + const rep_t aAbs = aRep & absMask; + const rep_t bAbs = bRep & absMask; + + // Detect if a or b is zero, infinity, or NaN. + if (aAbs - REP_C(1) >= infRep - REP_C(1) || + bAbs - REP_C(1) >= infRep - REP_C(1)) { + // NaN + anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything + NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // +/-infinity + -/+infinity = qNaN + if ((toRep(a) ^ toRep(b)) == signBit) + return fromRep(qnanRep); + // +/-infinity + anything remaining = +/- infinity + else + return a; + } + + // anything remaining + +/-infinity = +/-infinity + if (bAbs == infRep) + return b; + + // zero + anything = anything + if (!aAbs) { + // We need to get the sign right for zero + zero. + if (!bAbs) + return fromRep(toRep(a) & toRep(b)); + else + return b; + } + + // anything + zero = anything + if (!bAbs) + return a; + } + + // Swap a and b if necessary so that a has the larger absolute value. + if (bAbs > aAbs) { + const rep_t temp = aRep; + aRep = bRep; + bRep = temp; + } + + // Extract the exponent and significand from the (possibly swapped) a and b. + int aExponent = aRep >> significandBits & maxExponent; + int bExponent = bRep >> significandBits & maxExponent; + rep_t aSignificand = aRep & significandMask; + rep_t bSignificand = bRep & significandMask; + + // Normalize any denormals, and adjust the exponent accordingly. + if (aExponent == 0) + aExponent = normalize(&aSignificand); + if (bExponent == 0) + bExponent = normalize(&bSignificand); + + // The sign of the result is the sign of the larger operand, a. If they + // have opposite signs, we are performing a subtraction. Otherwise, we + // perform addition. + const rep_t resultSign = aRep & signBit; + const bool subtraction = (aRep ^ bRep) & signBit; + + // Shift the significands to give us round, guard and sticky, and set the + // implicit significand bit. If we fell through from the denormal path it + // was already set by normalize( ), but setting it twice won't hurt + // anything. + aSignificand = (aSignificand | implicitBit) << 3; + bSignificand = (bSignificand | implicitBit) << 3; + + // Shift the significand of b by the difference in exponents, with a sticky + // bottom bit to get rounding correct. + const unsigned int align = aExponent - bExponent; + if (align) { + if (align < typeWidth) { + const bool sticky = (bSignificand << (typeWidth - align)) != 0; + bSignificand = bSignificand >> align | sticky; + } else { + bSignificand = 1; // Set the sticky bit. b is known to be non-zero. + } + } + if (subtraction) { + aSignificand -= bSignificand; + // If a == -b, return +zero. + if (aSignificand == 0) + return fromRep(0); + + // If partial cancellation occured, we need to left-shift the result + // and adjust the exponent. + if (aSignificand < implicitBit << 3) { + const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); + aSignificand <<= shift; + aExponent -= shift; + } + } else /* addition */ { + aSignificand += bSignificand; + + // If the addition carried up, we need to right-shift the result and + // adjust the exponent. + if (aSignificand & implicitBit << 4) { + const bool sticky = aSignificand & 1; + aSignificand = aSignificand >> 1 | sticky; + aExponent += 1; + } + } + + // If we have overflowed the type, return +/- infinity. + if (aExponent >= maxExponent) + return fromRep(infRep | resultSign); + + if (aExponent <= 0) { + // The result is denormal before rounding. The exponent is zero and we + // need to shift the significand. + const int shift = 1 - aExponent; + const bool sticky = (aSignificand << (typeWidth - shift)) != 0; + aSignificand = aSignificand >> shift | sticky; + aExponent = 0; + } + + // Low three bits are round, guard, and sticky. + const int roundGuardSticky = aSignificand & 0x7; + + // Shift the significand into place, and mask off the implicit bit. + rep_t result = aSignificand >> 3 & significandMask; + + // Insert the exponent and sign. + result |= (rep_t)aExponent << significandBits; + result |= resultSign; + + // Perform the final rounding. The result may overflow to infinity, but + // that is the correct result in that case. + switch (__fe_getround()) { + case FE_TONEAREST: + if (roundGuardSticky > 0x4) + result++; + if (roundGuardSticky == 0x4) + result += result & 1; + break; + case FE_DOWNWARD: + if (resultSign && roundGuardSticky) result++; + break; + case FE_UPWARD: + if (!resultSign && roundGuardSticky) result++; + break; + case FE_TOWARDZERO: + break; + } + if (roundGuardSticky) + __fe_raise_inexact(); + return fromRep(result); +} diff --git a/src/libclang/fp_div_impl.inc b/src/libclang/fp_div_impl.inc new file mode 100644 index 0000000..29bcd19 --- /dev/null +++ b/src/libclang/fp_div_impl.inc @@ -0,0 +1,419 @@ +//===-- fp_div_impl.inc - Floating point division -----------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float division with the IEEE-754 default +// rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +// The __divXf3__ function implements Newton-Raphson floating point division. +// It uses 3 iterations for float32, 4 for float64 and 5 for float128, +// respectively. Due to number of significant bits being roughly doubled +// every iteration, the two modes are supported: N full-width iterations (as +// it is done for float32 by default) and (N-1) half-width iteration plus one +// final full-width iteration. It is expected that half-width integer +// operations (w.r.t rep_t size) can be performed faster for some hardware but +// they require error estimations to be computed separately due to larger +// computational errors caused by truncating intermediate results. + +// Half the bit-size of rep_t +#define HW (typeWidth / 2) +// rep_t-sized bitmask with lower half of bits set to ones +#define loMask (REP_C(-1) >> HW) + +#if NUMBER_OF_FULL_ITERATIONS < 1 +#error At least one full iteration is required +#endif + +static __inline fp_t __divXf3__(fp_t a, fp_t b) { + + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent - 1U >= maxExponent - 1U || + bExponent - 1U >= maxExponent - 1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) + return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else + return fromRep(aAbs | quotientSign); + } + + // anything else / infinity = +/- 0 + if (bAbs == infRep) + return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) + return fromRep(qnanRep); + // zero / anything else = +/- zero + else + return fromRep(quotientSign); + } + // anything else / zero = +/- infinity + if (!bAbs) + return fromRep(infRep | quotientSign); + + // One or both of a or b is denormal. The other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) + scale += normalize(&aSignificand); + if (bAbs < implicitBit) + scale -= normalize(&bSignificand); + } + + // Set the implicit significand bit. If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything. + aSignificand |= implicitBit; + bSignificand |= implicitBit; + + int writtenExponent = (aExponent - bExponent + scale) + exponentBias; + + const rep_t b_UQ1 = bSignificand << (typeWidth - significandBits - 1); + + // Align the significand of b as a UQ1.(n-1) fixed-point number in the range + // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax + // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2. + // The max error for this approximation is achieved at endpoints, so + // abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289..., + // which is about 4.5 bits. + // The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571... + + // Then, refine the reciprocal estimate using a quadratically converging + // Newton-Raphson iteration: + // x_{n+1} = x_n * (2 - x_n * b) + // + // Let b be the original divisor considered "in infinite precision" and + // obtained from IEEE754 representation of function argument (with the + // implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in + // UQ1.(W-1). + // + // Let b_hw be an infinitely precise number obtained from the highest (HW-1) + // bits of divisor significand (with the implicit bit set). Corresponds to + // half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated** + // version of b_UQ1. + // + // Let e_n := x_n - 1/b_hw + // E_n := x_n - 1/b + // abs(E_n) <= abs(e_n) + (1/b_hw - 1/b) + // = abs(e_n) + (b - b_hw) / (b*b_hw) + // <= abs(e_n) + 2 * 2^-HW + + // rep_t-sized iterations may be slower than the corresponding half-width + // variant depending on the handware and whether single/double/quad precision + // is selected. + // NB: Using half-width iterations increases computation errors due to + // rounding, so error estimations have to be computed taking the selected + // mode into account! +#if NUMBER_OF_HALF_ITERATIONS > 0 + // Starting with (n-1) half-width iterations + const half_rep_t b_UQ1_hw = bSignificand >> (significandBits + 1 - HW); + + // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW + // with W0 being either 16 or 32 and W0 <= HW. + // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which + // b/2 is subtracted to obtain x0) wrapped to [0, 1) range. +#if defined(SINGLE_PRECISION) + // Use 16-bit initial estimation in case we are using half-width iterations + // for float32 division. This is expected to be useful for some 16-bit + // targets. Not used by default as it requires performing more work during + // rounding and would hardly help on regular 32- or 64-bit targets. + const half_rep_t C_hw = HALF_REP_C(0x7504); +#else + // HW is at least 32. Shifting into the highest bits if needed. + const half_rep_t C_hw = HALF_REP_C(0x7504F333) << (HW - 32); +#endif + + // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, + // so x0 fits to UQ0.HW without wrapping. + half_rep_t x_UQ0_hw = C_hw - (b_UQ1_hw /* exact b_hw/2 as UQ0.HW */); + // An e_0 error is comprised of errors due to + // * x0 being an inherently imprecise first approximation of 1/b_hw + // * C_hw being some (irrational) number **truncated** to W0 bits + // Please note that e_0 is calculated against the infinitely precise + // reciprocal of b_hw (that is, **truncated** version of b). + // + // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0 + + // By construction, 1 <= b < 2 + // f(x) = x * (2 - b*x) = 2*x - b*x^2 + // f'(x) = 2 * (1 - b*x) + // + // On the [0, 1] interval, f(0) = 0, + // then it increses until f(1/b) = 1 / b, maximum on (0, 1), + // then it decreses to f(1) = 2 - b + // + // Let g(x) = x - f(x) = b*x^2 - x. + // On (0, 1/b), g(x) < 0 <=> f(x) > x + // On (1/b, 1], g(x) > 0 <=> f(x) < x + // + // For half-width iterations, b_hw is used instead of b. + REPEAT_N_TIMES(NUMBER_OF_HALF_ITERATIONS, { + // corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp + // of corr_UQ1_hw. + // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1). + // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided + // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is + // expected to be strictly positive because b_UQ1_hw has its highest bit set + // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). + half_rep_t corr_UQ1_hw = 0 - ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW); + + // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally + // obtaining an UQ1.(HW-1) number and proving its highest bit could be + // considered to be 0 to be able to represent it in UQ0.HW. + // From the above analysis of f(x), if corr_UQ1_hw would be represented + // without any intermediate loss of precision (that is, in twice_rep_t) + // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly + // less otherwise. On the other hand, to obtain [1.]000..., one have to pass + // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due + // to 1.0 being not representable as UQ0.HW). + // The fact corr_UQ1_hw was virtually round up (due to result of + // multiplication being **first** truncated, then negated - to improve + // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. + x_UQ0_hw = (rep_t)x_UQ0_hw * corr_UQ1_hw >> (HW - 1); + // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t + // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after + // any number of iterations, so just subtract 2 from the reciprocal + // approximation after last iteration. + + // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW: + // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1 + // = 1 - e_n * b_hw + 2*eps1 + // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2 + // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2 + // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw + // \------ >0 -------/ \-- >0 ---/ + // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U) + }) + // For initial half-width iterations, U = 2^-HW + // Let abs(e_n) <= u_n * U, + // then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U) + // u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2) + + // Account for possible overflow (see above). For an overflow to occur for the + // first time, for "ideal" corr_UQ1_hw (that is, without intermediate + // truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum + // value representable in UQ0.HW or less by 1. This means that 1/b_hw have to + // be not below that value (see g(x) above), so it is safe to decrement just + // once after the final iteration. On the other hand, an effective value of + // divisor changes after this point (from b_hw to b), so adjust here. + x_UQ0_hw -= 1U; + rep_t x_UQ0 = (rep_t)x_UQ0_hw << HW; + x_UQ0 -= 1U; + +#else + // C is (3/4 + 1/sqrt(2)) - 1 truncated to 32 fractional bits as UQ0.n + const rep_t C = REP_C(0x7504F333) << (typeWidth - 32); + rep_t x_UQ0 = C - b_UQ1; + // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-32 +#endif + + // Error estimations for full-precision iterations are calculated just + // as above, but with U := 2^-W and taking extra decrementing into account. + // We need at least one such iteration. + +#ifdef USE_NATIVE_FULL_ITERATIONS + REPEAT_N_TIMES(NUMBER_OF_FULL_ITERATIONS, { + rep_t corr_UQ1 = 0 - ((twice_rep_t)x_UQ0 * b_UQ1 >> typeWidth); + x_UQ0 = (twice_rep_t)x_UQ0 * corr_UQ1 >> (typeWidth - 1); + }) +#else +#if NUMBER_OF_FULL_ITERATIONS != 1 +#error Only a single emulated full iteration is supported +#endif +#if !(NUMBER_OF_HALF_ITERATIONS > 0) + // Cannot normally reach here: only one full-width iteration is requested and + // the total number of iterations should be at least 3 even for float32. +#error Check NUMBER_OF_HALF_ITERATIONS, NUMBER_OF_FULL_ITERATIONS and USE_NATIVE_FULL_ITERATIONS. +#endif + // Simulating operations on a twice_rep_t to perform a single final full-width + // iteration. Using ad-hoc multiplication implementations to take advantage + // of particular structure of operands. + rep_t blo = b_UQ1 & loMask; + // x_UQ0 = x_UQ0_hw * 2^HW - 1 + // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1 + // + // <--- higher half ---><--- lower half ---> + // [x_UQ0_hw * b_UQ1_hw] + // + [ x_UQ0_hw * blo ] + // - [ b_UQ1 ] + // = [ result ][.... discarded ...] + rep_t corr_UQ1 = 0U - ( (rep_t)x_UQ0_hw * b_UQ1_hw + + ((rep_t)x_UQ0_hw * blo >> HW) + - REP_C(1)); // account for *possible* carry + rep_t lo_corr = corr_UQ1 & loMask; + rep_t hi_corr = corr_UQ1 >> HW; + // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 + x_UQ0 = ((rep_t)x_UQ0_hw * hi_corr << 1) + + ((rep_t)x_UQ0_hw * lo_corr >> (HW - 1)) + - REP_C(2); // 1 to account for the highest bit of corr_UQ1 can be 1 + // 1 to account for possible carry + // Just like the case of half-width iterations but with possibility + // of overflowing by one extra Ulp of x_UQ0. + x_UQ0 -= 1U; + // ... and then traditional fixup by 2 should work + + // On error estimation: + // abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW + // + (2^-HW + 2^-W)) + // abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW + + // Then like for the half-width iterations: + // With 0 <= eps1, eps2 < 2^-W + // E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b + // abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ] + // abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ] +#endif + + // Finally, account for possible overflow, as explained above. + x_UQ0 -= 2U; + + // u_n for different precisions (with N-1 half-width iterations): + // W0 is the precision of C + // u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW + + // Estimated with bc: + // define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; } + // define half2(un) { return 2.0 * un / 2.0^hw + 2.0; } + // define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; } + // define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; } + + // | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1) + // u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797 + // u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440 + // u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317 + // u_3 | < 7.31 | | < 7.31 | < 27054456580 + // u_4 | | | | < 80.4 + // Final (U_N) | same as u_3 | < 72 | < 218 | < 13920 + + // Add 2 to U_N due to final decrement. + +#if defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 2 && NUMBER_OF_FULL_ITERATIONS == 1 +#define RECIPROCAL_PRECISION REP_C(74) +#elif defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 0 && NUMBER_OF_FULL_ITERATIONS == 3 +#define RECIPROCAL_PRECISION REP_C(10) +#elif defined(DOUBLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 3 && NUMBER_OF_FULL_ITERATIONS == 1 +#define RECIPROCAL_PRECISION REP_C(220) +#elif defined(QUAD_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 4 && NUMBER_OF_FULL_ITERATIONS == 1 +#define RECIPROCAL_PRECISION REP_C(13922) +#else +#error Invalid number of iterations +#endif + + // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W + x_UQ0 -= RECIPROCAL_PRECISION; + // Now 1/b - (2*P) * 2^-W < x < 1/b + // FIXME Is x_UQ0 still >= 0.5? + + rep_t quotient_UQ1, dummy; + wideMultiply(x_UQ0, aSignificand << 1, "ient_UQ1, &dummy); + // Now, a/b - 4*P * 2^-W < q < a/b for q= in UQ1.(SB+1+W). + + // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1), + // adjust it to be in [1.0, 2.0) as UQ1.SB. + rep_t residualLo; + if (quotient_UQ1 < (implicitBit << 1)) { + // Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB, + // effectively doubling its value as well as its error estimation. + residualLo = (aSignificand << (significandBits + 1)) - quotient_UQ1 * bSignificand; + writtenExponent -= 1; + aSignificand <<= 1; + } else { + // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it + // to UQ1.SB by right shifting by 1. Least significant bit is omitted. + quotient_UQ1 >>= 1; + residualLo = (aSignificand << significandBits) - quotient_UQ1 * bSignificand; + } + // NB: residualLo is calculated above for the normal result case. + // It is re-computed on denormal path that is expected to be not so + // performance-sensitive. + + // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB + // Each NextAfter() increments the floating point value by at least 2^-SB + // (more, if exponent was incremented). + // Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint): + // q + // | | * | | | | | + // <---> 2^t + // | | | | | * | | + // q + // To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB. + // (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB + // (8*P) * 2^-W < 0.5 * 2^-SB + // P < 2^(W-4-SB) + // Generally, for at most R NextAfter() to be enough, + // P < (2*R - 1) * 2^(W-4-SB) + // For f32 (0+3): 10 < 32 (OK) + // For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required + // For f64: 220 < 256 (OK) + // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required) + + // If we have overflowed the exponent, return infinity + if (writtenExponent >= maxExponent) + return fromRep(infRep | quotientSign); + + // Now, quotient_UQ1_SB <= the correctly-rounded result + // and may need taking NextAfter() up to 3 times (see error estimates above) + // r = a - b * q + rep_t absResult; + if (writtenExponent > 0) { + // Clear the implicit bit + absResult = quotient_UQ1 & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + residualLo <<= 1; + } else { + // Prevent shift amount from being negative + if (significandBits + writtenExponent < 0) + return fromRep(quotientSign); + + absResult = quotient_UQ1 >> (-writtenExponent + 1); + + // multiplied by two to prevent shift amount to be negative + residualLo = (aSignificand << (significandBits + writtenExponent)) - (absResult * bSignificand << 1); + } + + // Round + residualLo += absResult & 1; // tie to even + // The above line conditionally turns the below LT comparison into LTE + absResult += residualLo > bSignificand; +#if defined(QUAD_PRECISION) || (defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS > 0) + // Do not round Infinity to NaN + absResult += absResult < infRep && residualLo > (2 + 1) * bSignificand; +#endif +#if defined(QUAD_PRECISION) + absResult += absResult < infRep && residualLo > (4 + 1) * bSignificand; +#endif + return fromRep(absResult | quotientSign); +} diff --git a/src/libclang/fp_extend.h b/src/libclang/fp_extend.h new file mode 100644 index 0000000..aad4436 --- /dev/null +++ b/src/libclang/fp_extend.h @@ -0,0 +1,99 @@ +//===-lib/fp_extend.h - low precision -> high precision conversion -*- C +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Set source and destination setting +// +//===----------------------------------------------------------------------===// + +#ifndef FP_EXTEND_HEADER +#define FP_EXTEND_HEADER + +#include "int_lib.h" + +#if defined SRC_SINGLE +typedef float src_t; +typedef uint32_t src_rep_t; +#define SRC_REP_C UINT32_C +static const int srcSigBits = 23; +#define src_rep_t_clz clzsi + +#elif defined SRC_DOUBLE +typedef double src_t; +typedef uint64_t src_rep_t; +#define SRC_REP_C UINT64_C +static const int srcSigBits = 52; +static __inline int src_rep_t_clz(src_rep_t a) { +#if defined __LP64__ + return __builtin_clzl(a); +#else + if (a & REP_C(0xffffffff00000000)) + return __builtin_clz(a >> 32); + else + return 32 + __builtin_clz(a & REP_C(0xffffffff)); +#endif +} + +#elif defined SRC_HALF +#ifdef COMPILER_RT_HAS_FLOAT16 +typedef _Float16 src_t; +#else +typedef uint16_t src_t; +#endif +typedef uint16_t src_rep_t; +#define SRC_REP_C UINT16_C +static const int srcSigBits = 10; +#define src_rep_t_clz __builtin_clz + +#else +#error Source should be half, single, or double precision! +#endif // end source precision + +#if defined DST_SINGLE +typedef float dst_t; +typedef uint32_t dst_rep_t; +#define DST_REP_C UINT32_C +static const int dstSigBits = 23; + +#elif defined DST_DOUBLE +typedef double dst_t; +typedef uint64_t dst_rep_t; +#define DST_REP_C UINT64_C +static const int dstSigBits = 52; + +#elif defined DST_QUAD +typedef long double dst_t; +typedef __uint128_t dst_rep_t; +#define DST_REP_C (__uint128_t) +static const int dstSigBits = 112; + +#else +#error Destination should be single, double, or quad precision! +#endif // end destination precision + +// End of specialization parameters. Two helper routines for conversion to and +// from the representation of floating-point data as integer values follow. + +static __inline src_rep_t srcToRep(src_t x) { + const union { + src_t f; + src_rep_t i; + } rep = {.f = x}; + return rep.i; +} + +static __inline dst_t dstFromRep(dst_rep_t x) { + const union { + dst_t f; + dst_rep_t i; + } rep = {.i = x}; + return rep.f; +} +// End helper routines. Conversion implementation follows. + +#endif // FP_EXTEND_HEADER diff --git a/src/libclang/fp_extend_impl.inc b/src/libclang/fp_extend_impl.inc new file mode 100644 index 0000000..d1c9c02 --- /dev/null +++ b/src/libclang/fp_extend_impl.inc @@ -0,0 +1,107 @@ +//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a fairly generic conversion from a narrower to a wider +// IEEE-754 floating-point type. The constants and types defined following the +// includes below parameterize the conversion. +// +// It does not support types that don't use the usual IEEE-754 interchange +// formats; specifically, some work would be needed to adapt it to +// (for example) the Intel 80-bit format or PowerPC double-double format. +// +// Note please, however, that this implementation is only intended to support +// *widening* operations; if you need to convert to a *narrower* floating-point +// type (e.g. double -> float), then this routine will not do what you want it +// to. +// +// It also requires that integer types at least as large as both formats +// are available on the target platform; this may pose a problem when trying +// to add support for quad on some 32-bit systems, for example. You also may +// run into trouble finding an appropriate CLZ function for wide source types; +// you will likely need to roll your own on some platforms. +// +// Finally, the following assumptions are made: +// +// 1. Floating-point types and integer types have the same endianness on the +// target platform. +// +// 2. Quiet NaNs, if supported, are indicated by the leading bit of the +// significand field being set. +// +//===----------------------------------------------------------------------===// + +#include "fp_extend.h" + +static __inline dst_t __extendXfYf2__(src_t a) { + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcBits = sizeof(src_t) * CHAR_BIT; + const int srcExpBits = srcBits - srcSigBits - 1; + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; + const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); + const src_rep_t srcAbsMask = srcSignMask - 1; + const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); + const src_rep_t srcNaNCode = srcQNaN - 1; + + const int dstBits = sizeof(dst_t) * CHAR_BIT; + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; + + // Break a into a sign and representation of the absolute value. + const src_rep_t aRep = srcToRep(a); + const src_rep_t aAbs = aRep & srcAbsMask; + const src_rep_t sign = aRep & srcSignMask; + dst_rep_t absResult; + + // If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted + // to (signed) int. To avoid that, explicitly cast to src_rep_t. + if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) { + // a is a normal number. + // Extend to the destination type by shifting the significand and + // exponent into the proper position and rebiasing the exponent. + absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits); + absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits; + } + + else if (aAbs >= srcInfinity) { + // a is NaN or infinity. + // Conjure the result by beginning with infinity, then setting the qNaN + // bit (if needed) and right-aligning the rest of the trailing NaN + // payload field. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); + absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); + } + + else if (aAbs) { + // a is denormal. + // renormalize the significand and clear the leading bit, then insert + // the correct adjusted exponent in the destination type. + const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal); + absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale); + absResult ^= dstMinNormal; + const int resultExponent = dstExpBias - srcExpBias - scale + 1; + absResult |= (dst_rep_t)resultExponent << dstSigBits; + } + + else { + // a is zero. + absResult = 0; + } + + // Apply the signbit to the absolute value. + const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits); + return dstFromRep(result); +} diff --git a/src/libclang/fp_fixint_impl.inc b/src/libclang/fp_fixint_impl.inc new file mode 100644 index 0000000..2196d71 --- /dev/null +++ b/src/libclang/fp_fixint_impl.inc @@ -0,0 +1,40 @@ +//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements float to integer conversion for the +// compiler-rt library. +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fixint_t __fixint(fp_t a) { + const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2); + const fixint_t fixint_min = -fixint_max - 1; + // Break a into sign, exponent, significand parts. + const rep_t aRep = toRep(a); + const rep_t aAbs = aRep & absMask; + const fixint_t sign = aRep & signBit ? -1 : 1; + const int exponent = (aAbs >> significandBits) - exponentBias; + const rep_t significand = (aAbs & significandMask) | implicitBit; + + // If exponent is negative, the result is zero. + if (exponent < 0) + return 0; + + // If the value is too large for the integer type, saturate. + if ((unsigned)exponent >= sizeof(fixint_t) * CHAR_BIT) + return sign == 1 ? fixint_max : fixint_min; + + // If 0 <= exponent < significandBits, right shift to get the result. + // Otherwise, shift left. + if (exponent < significandBits) + return sign * (significand >> (significandBits - exponent)); + else + return sign * ((fixint_t)significand << (exponent - significandBits)); +} diff --git a/src/libclang/fp_lib.h b/src/libclang/fp_lib.h new file mode 100644 index 0000000..f22feaf --- /dev/null +++ b/src/libclang/fp_lib.h @@ -0,0 +1,326 @@ +//===-- lib/fp_lib.h - Floating-point utilities -------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a configuration header for soft-float routines in compiler-rt. +// This file does not provide any part of the compiler-rt interface, but defines +// many useful constants and utility routines that are used in the +// implementation of the soft-float routines in compiler-rt. +// +// Assumes that float, double and long double correspond to the IEEE-754 +// binary32, binary64 and binary 128 types, respectively, and that integer +// endianness matches floating point endianness on the target platform. +// +//===----------------------------------------------------------------------===// + +#ifndef FP_LIB_HEADER +#define FP_LIB_HEADER + +#include "int_lib.h" +#include "int_math.h" +#include +#include +#include + +// x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in +// 32-bit mode. +#if defined(__FreeBSD__) && defined(__i386__) +#include +#if __FreeBSD_version < 903000 // v9.3 +#define uint64_t unsigned long long +#define int64_t long long +#undef UINT64_C +#define UINT64_C(c) (c##ULL) +#endif +#endif + +#if defined SINGLE_PRECISION + +typedef uint16_t half_rep_t; +typedef uint32_t rep_t; +typedef uint64_t twice_rep_t; +typedef int32_t srep_t; +typedef float fp_t; +#define HALF_REP_C UINT16_C +#define REP_C UINT32_C +#define significandBits 23 + +static __inline int rep_clz(rep_t a) { return clzsi(a); } + +// 32x32 --> 64 bit multiply +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + const uint64_t product = (uint64_t)a * b; + *hi = product >> 32; + *lo = product; +} +COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b); + +#elif defined DOUBLE_PRECISION + +typedef uint32_t half_rep_t; +typedef uint64_t rep_t; +typedef int64_t srep_t; +typedef double fp_t; +#define HALF_REP_C UINT32_C +#define REP_C UINT64_C +#define significandBits 52 + +static __inline int rep_clz(rep_t a) { +#if defined __LP64__ + return __builtin_clzl(a); +#else + if (a & REP_C(0xffffffff00000000)) + return clzsi(a >> 32); + else + return 32 + clzsi(a & REP_C(0xffffffff)); +#endif +} + +#define loWord(a) (a & 0xffffffffU) +#define hiWord(a) (a >> 32) + +// 64x64 -> 128 wide multiply for platforms that don't have such an operation; +// many 64-bit platforms have this operation, but they tend to have hardware +// floating-point, so we don't bother with a special case for them here. +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + // Each of the component 32x32 -> 64 products + const uint64_t plolo = loWord(a) * loWord(b); + const uint64_t plohi = loWord(a) * hiWord(b); + const uint64_t philo = hiWord(a) * loWord(b); + const uint64_t phihi = hiWord(a) * hiWord(b); + // Sum terms that contribute to lo in a way that allows us to get the carry + const uint64_t r0 = loWord(plolo); + const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); + *lo = r0 + (r1 << 32); + // Sum terms contributing to hi with the carry from lo + *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; +} +#undef loWord +#undef hiWord + +COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b); + +#elif defined QUAD_PRECISION +#if __LDBL_MANT_DIG__ == 113 && defined(__SIZEOF_INT128__) +#define CRT_LDBL_128BIT +typedef uint64_t half_rep_t; +typedef __uint128_t rep_t; +typedef __int128_t srep_t; +typedef long double fp_t; +#define HALF_REP_C UINT64_C +#define REP_C (__uint128_t) +// Note: Since there is no explicit way to tell compiler the constant is a +// 128-bit integer, we let the constant be casted to 128-bit integer +#define significandBits 112 + +static __inline int rep_clz(rep_t a) { + const union { + __uint128_t ll; +#if _YUGA_BIG_ENDIAN + struct { + uint64_t high, low; + } s; +#else + struct { + uint64_t low, high; + } s; +#endif + } uu = {.ll = a}; + + uint64_t word; + uint64_t add; + + if (uu.s.high) { + word = uu.s.high; + add = 0; + } else { + word = uu.s.low; + add = 64; + } + return __builtin_clzll(word) + add; +} + +#define Word_LoMask UINT64_C(0x00000000ffffffff) +#define Word_HiMask UINT64_C(0xffffffff00000000) +#define Word_FullMask UINT64_C(0xffffffffffffffff) +#define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask) +#define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask) +#define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask) +#define Word_4(a) (uint64_t)(a & Word_LoMask) + +// 128x128 -> 256 wide multiply for platforms that don't have such an operation; +// many 64-bit platforms have this operation, but they tend to have hardware +// floating-point, so we don't bother with a special case for them here. +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + + const uint64_t product11 = Word_1(a) * Word_1(b); + const uint64_t product12 = Word_1(a) * Word_2(b); + const uint64_t product13 = Word_1(a) * Word_3(b); + const uint64_t product14 = Word_1(a) * Word_4(b); + const uint64_t product21 = Word_2(a) * Word_1(b); + const uint64_t product22 = Word_2(a) * Word_2(b); + const uint64_t product23 = Word_2(a) * Word_3(b); + const uint64_t product24 = Word_2(a) * Word_4(b); + const uint64_t product31 = Word_3(a) * Word_1(b); + const uint64_t product32 = Word_3(a) * Word_2(b); + const uint64_t product33 = Word_3(a) * Word_3(b); + const uint64_t product34 = Word_3(a) * Word_4(b); + const uint64_t product41 = Word_4(a) * Word_1(b); + const uint64_t product42 = Word_4(a) * Word_2(b); + const uint64_t product43 = Word_4(a) * Word_3(b); + const uint64_t product44 = Word_4(a) * Word_4(b); + + const __uint128_t sum0 = (__uint128_t)product44; + const __uint128_t sum1 = (__uint128_t)product34 + (__uint128_t)product43; + const __uint128_t sum2 = + (__uint128_t)product24 + (__uint128_t)product33 + (__uint128_t)product42; + const __uint128_t sum3 = (__uint128_t)product14 + (__uint128_t)product23 + + (__uint128_t)product32 + (__uint128_t)product41; + const __uint128_t sum4 = + (__uint128_t)product13 + (__uint128_t)product22 + (__uint128_t)product31; + const __uint128_t sum5 = (__uint128_t)product12 + (__uint128_t)product21; + const __uint128_t sum6 = (__uint128_t)product11; + + const __uint128_t r0 = (sum0 & Word_FullMask) + ((sum1 & Word_LoMask) << 32); + const __uint128_t r1 = (sum0 >> 64) + ((sum1 >> 32) & Word_FullMask) + + (sum2 & Word_FullMask) + ((sum3 << 32) & Word_HiMask); + + *lo = r0 + (r1 << 64); + *hi = (r1 >> 64) + (sum1 >> 96) + (sum2 >> 64) + (sum3 >> 32) + sum4 + + (sum5 << 32) + (sum6 << 64); +} +#undef Word_1 +#undef Word_2 +#undef Word_3 +#undef Word_4 +#undef Word_HiMask +#undef Word_LoMask +#undef Word_FullMask +#endif // __LDBL_MANT_DIG__ == 113 && __SIZEOF_INT128__ +#else +#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. +#endif + +#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || \ + defined(CRT_LDBL_128BIT) +#define typeWidth (sizeof(rep_t) * CHAR_BIT) +#define exponentBits (typeWidth - significandBits - 1) +#define maxExponent ((1 << exponentBits) - 1) +#define exponentBias (maxExponent >> 1) + +#define implicitBit (REP_C(1) << significandBits) +#define significandMask (implicitBit - 1U) +#define signBit (REP_C(1) << (significandBits + exponentBits)) +#define absMask (signBit - 1U) +#define exponentMask (absMask ^ significandMask) +#define oneRep ((rep_t)exponentBias << significandBits) +#define infRep exponentMask +#define quietBit (implicitBit >> 1) +#define qnanRep (exponentMask | quietBit) + +static __inline rep_t toRep(fp_t x) { + const union { + fp_t f; + rep_t i; + } rep = {.f = x}; + return rep.i; +} + +static __inline fp_t fromRep(rep_t x) { + const union { + fp_t f; + rep_t i; + } rep = {.i = x}; + return rep.f; +} + +static __inline int normalize(rep_t *significand) { + const int shift = rep_clz(*significand) - rep_clz(implicitBit); + *significand <<= shift; + return 1 - shift; +} + +static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) { + *hi = *hi << count | *lo >> (typeWidth - count); + *lo = *lo << count; +} + +static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, + unsigned int count) { + if (count < typeWidth) { + const bool sticky = (*lo << (typeWidth - count)) != 0; + *lo = *hi << (typeWidth - count) | *lo >> count | sticky; + *hi = *hi >> count; + } else if (count < 2 * typeWidth) { + const bool sticky = *hi << (2 * typeWidth - count) | *lo; + *lo = *hi >> (count - typeWidth) | sticky; + *hi = 0; + } else { + const bool sticky = *hi | *lo; + *lo = sticky; + *hi = 0; + } +} + +// Implements logb methods (logb, logbf, logbl) for IEEE-754. This avoids +// pulling in a libm dependency from compiler-rt, but is not meant to replace +// it (i.e. code calling logb() should get the one from libm, not this), hence +// the __compiler_rt prefix. +static __inline fp_t __compiler_rt_logbX(fp_t x) { + rep_t rep = toRep(x); + int exp = (rep & exponentMask) >> significandBits; + + // Abnormal cases: + // 1) +/- inf returns +inf; NaN returns NaN + // 2) 0.0 returns -inf + if (exp == maxExponent) { + if (((rep & signBit) == 0) || (x != x)) { + return x; // NaN or +inf: return x + } else { + return -x; // -inf: return -x + } + } else if (x == 0.0) { + // 0.0: return -inf + return fromRep(infRep | signBit); + } + + if (exp != 0) { + // Normal number + return exp - exponentBias; // Unbias exponent + } else { + // Subnormal number; normalize and repeat + rep &= absMask; + const int shift = 1 - normalize(&rep); + exp = (rep & exponentMask) >> significandBits; + return exp - exponentBias - shift; // Unbias exponent + } +} +#endif + +#if defined(SINGLE_PRECISION) +static __inline fp_t __compiler_rt_logbf(fp_t x) { + return __compiler_rt_logbX(x); +} +#elif defined(DOUBLE_PRECISION) +static __inline fp_t __compiler_rt_logb(fp_t x) { + return __compiler_rt_logbX(x); +} +#elif defined(QUAD_PRECISION) +#if defined(CRT_LDBL_128BIT) +static __inline fp_t __compiler_rt_logbl(fp_t x) { + return __compiler_rt_logbX(x); +} +#else +// The generic implementation only works for ieee754 floating point. For other +// floating point types, continue to rely on the libm implementation for now. +static __inline long double __compiler_rt_logbl(long double x) { + return crt_logbl(x); +} +#endif +#endif + +#endif // FP_LIB_HEADER diff --git a/src/libclang/fp_mode.h b/src/libclang/fp_mode.h new file mode 100644 index 0000000..4ba682c --- /dev/null +++ b/src/libclang/fp_mode.h @@ -0,0 +1,29 @@ +//===----- lib/fp_mode.h - Floaing-point environment mode utilities --C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is not part of the interface of this library. +// +// This file defines an interface for accessing hardware floating point +// environment mode. +// +//===----------------------------------------------------------------------===// + +#ifndef FP_MODE +#define FP_MODE + +typedef enum { + FE_TONEAREST, + FE_DOWNWARD, + FE_UPWARD, + FE_TOWARDZERO +} FE_ROUND_MODE; + +FE_ROUND_MODE __fe_getround(void); +int __fe_raise_inexact(void); + +#endif // FP_MODE_H diff --git a/src/libclang/fp_mul_impl.inc b/src/libclang/fp_mul_impl.inc new file mode 100644 index 0000000..a93f2d7 --- /dev/null +++ b/src/libclang/fp_mul_impl.inc @@ -0,0 +1,128 @@ +//===---- lib/fp_mul_impl.inc - floating point multiplication -----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float multiplication with the IEEE-754 default +// rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fp_t __mulXf3__(fp_t a, fp_t b) { + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent - 1U >= maxExponent - 1U || + bExponent - 1U >= maxExponent - 1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN * anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything * NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity * non-zero = +/- infinity + if (bAbs) + return fromRep(aAbs | productSign); + // infinity * zero = NaN + else + return fromRep(qnanRep); + } + + if (bAbs == infRep) { + // non-zero * infinity = +/- infinity + if (aAbs) + return fromRep(bAbs | productSign); + // zero * infinity = NaN + else + return fromRep(qnanRep); + } + + // zero * anything = +/- zero + if (!aAbs) + return fromRep(productSign); + // anything * zero = +/- zero + if (!bAbs) + return fromRep(productSign); + + // One or both of a or b is denormal. The other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) + scale += normalize(&aSignificand); + if (bAbs < implicitBit) + scale += normalize(&bSignificand); + } + + // Set the implicit significand bit. If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything. + aSignificand |= implicitBit; + bSignificand |= implicitBit; + + // Perform a basic multiplication on the significands. One of them must be + // shifted beforehand to be aligned with the exponent. + rep_t productHi, productLo; + wideMultiply(aSignificand, bSignificand << exponentBits, &productHi, + &productLo); + + int productExponent = aExponent + bExponent - exponentBias + scale; + + // Normalize the significand and adjust the exponent if needed. + if (productHi & implicitBit) + productExponent++; + else + wideLeftShift(&productHi, &productLo, 1); + + // If we have overflowed the type, return +/- infinity. + if (productExponent >= maxExponent) + return fromRep(infRep | productSign); + + if (productExponent <= 0) { + // The result is denormal before rounding. + // + // If the result is so small that it just underflows to zero, return + // zero with the appropriate sign. Mathematically, there is no need to + // handle this case separately, but we make it a special case to + // simplify the shift logic. + const unsigned int shift = REP_C(1) - (unsigned int)productExponent; + if (shift >= typeWidth) + return fromRep(productSign); + + // Otherwise, shift the significand of the result so that the round + // bit is the high bit of productLo. + wideRightShiftWithSticky(&productHi, &productLo, shift); + } else { + // The result is normal before rounding. Insert the exponent. + productHi &= significandMask; + productHi |= (rep_t)productExponent << significandBits; + } + + // Insert the sign of the result. + productHi |= productSign; + + // Perform the final rounding. The final result may overflow to infinity, + // or underflow to zero, but those are the correct results in those cases. + // We use the default IEEE-754 round-to-nearest, ties-to-even rounding mode. + if (productLo > signBit) + productHi++; + if (productLo == signBit) + productHi += productHi & 1; + return fromRep(productHi); +} diff --git a/src/libclang/int_endianness.h b/src/libclang/int_endianness.h new file mode 100644 index 0000000..def046c --- /dev/null +++ b/src/libclang/int_endianness.h @@ -0,0 +1,114 @@ +//===-- int_endianness.h - configuration header for compiler-rt -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a configuration header for compiler-rt. +// This file is not part of the interface of this library. +// +//===----------------------------------------------------------------------===// + +#ifndef INT_ENDIANNESS_H +#define INT_ENDIANNESS_H + +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ + defined(__ORDER_LITTLE_ENDIAN__) + +// Clang and GCC provide built-in endianness definitions. +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif // __BYTE_ORDER__ + +#else // Compilers other than Clang or GCC. + +#if defined(__SVR4) && defined(__sun) +#include + +#if defined(_BIG_ENDIAN) +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif defined(_LITTLE_ENDIAN) +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#else // !_LITTLE_ENDIAN +#error "unknown endianness" +#endif // !_LITTLE_ENDIAN + +#endif // Solaris and AuroraUX. + +// .. + +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \ + defined(__minix) +#include + +#if _BYTE_ORDER == _BIG_ENDIAN +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif _BYTE_ORDER == _LITTLE_ENDIAN +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif // _BYTE_ORDER + +#endif // *BSD + +#if defined(__OpenBSD__) +#include + +#if _BYTE_ORDER == _BIG_ENDIAN +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif _BYTE_ORDER == _LITTLE_ENDIAN +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif // _BYTE_ORDER + +#endif // OpenBSD + +// .. + +// Mac OSX has __BIG_ENDIAN__ or __LITTLE_ENDIAN__ automatically set by the +// compiler (at least with GCC) +#if defined(__APPLE__) || defined(__ellcc__) + +#ifdef __BIG_ENDIAN__ +#if __BIG_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#endif +#endif // __BIG_ENDIAN__ + +#ifdef __LITTLE_ENDIAN__ +#if __LITTLE_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif +#endif // __LITTLE_ENDIAN__ + +#endif // Mac OSX + +// .. + +#if defined(_WIN32) + +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 + +#endif // Windows + +#endif // Clang or GCC. + +// . + +#if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN) +#error Unable to determine endian +#endif // Check we found an endianness correctly. + +#endif // INT_ENDIANNESS_H diff --git a/src/libclang/int_lib.h b/src/libclang/int_lib.h new file mode 100644 index 0000000..1fedbf6 --- /dev/null +++ b/src/libclang/int_lib.h @@ -0,0 +1,149 @@ +//===-- int_lib.h - configuration header for compiler-rt -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a configuration header for compiler-rt. +// This file is not part of the interface of this library. +// +//===----------------------------------------------------------------------===// + +#ifndef INT_LIB_H +#define INT_LIB_H + +// Assumption: Signed integral is 2's complement. +// Assumption: Right shift of signed negative is arithmetic shift. +// Assumption: Endianness is little or big (not mixed). + +// ABI macro definitions + +#if __ARM_EABI__ +#ifdef COMPILER_RT_ARMHF_TARGET +#define COMPILER_RT_ABI +#else +#define COMPILER_RT_ABI __attribute__((__pcs__("aapcs"))) +#endif +#else +#define COMPILER_RT_ABI +#endif + +#define AEABI_RTABI __attribute__((__pcs__("aapcs"))) + +#if defined(_MSC_VER) && !defined(__clang__) +#define ALWAYS_INLINE __forceinline +#define NOINLINE __declspec(noinline) +#define NORETURN __declspec(noreturn) +#define UNUSED +#else +#define ALWAYS_INLINE __attribute__((always_inline)) +#define NOINLINE __attribute__((noinline)) +#define NORETURN __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) +#endif + +#define STR(a) #a +#define XSTR(a) STR(a) +#define SYMBOL_NAME(name) XSTR(__USER_LABEL_PREFIX__) #name + +#if defined(__ELF__) || defined(__MINGW32__) || defined(__wasm__) || \ + defined(_AIX) || defined(__mips__) +#define COMPILER_RT_ALIAS(name, aliasname) \ + COMPILER_RT_ABI __typeof(name) aliasname __attribute__((__alias__(#name))); +#elif defined(__APPLE__) +#if defined(VISIBILITY_HIDDEN) +#define COMPILER_RT_ALIAS_VISIBILITY(name) \ + __asm__(".private_extern " SYMBOL_NAME(name)); +#else +#define COMPILER_RT_ALIAS_VISIBILITY(name) +#endif +#define COMPILER_RT_ALIAS(name, aliasname) \ + __asm__(".globl " SYMBOL_NAME(aliasname)); \ + COMPILER_RT_ALIAS_VISIBILITY(aliasname) \ + __asm__(SYMBOL_NAME(aliasname) " = " SYMBOL_NAME(name)); \ + COMPILER_RT_ABI __typeof(name) aliasname; +#elif defined(_WIN32) +#define COMPILER_RT_ALIAS(name, aliasname) +#else +#error Unsupported target +#endif + +#if defined(__NetBSD__) && (defined(_KERNEL) || defined(_STANDALONE)) +// +// Kernel and boot environment can't use normal headers, +// so use the equivalent system headers. +// +#include +#include +#include +#else +// Include the standard compiler builtin headers we use functionality from. +#include +#include +#include +#include +#endif + +// Include the commonly used internal type definitions. +#include "int_types.h" + +// Include internal utility function declarations. +#include "int_util.h" + +COMPILER_RT_ABI int __paritysi2(si_int a); +COMPILER_RT_ABI int __paritydi2(di_int a); + +COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b); +COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b); +COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d); + +COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int *rem); +COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem); +#ifdef CRT_HAS_128BIT +COMPILER_RT_ABI int __clzti2(ti_int a); +COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem); +#endif + +// Definitions for builtins unavailable on MSVC +#if defined(_MSC_VER) && !defined(__clang__) +#include + +int __inline __builtin_ctz(uint32_t value) { + unsigned long trailing_zero = 0; + if (_BitScanForward(&trailing_zero, value)) + return trailing_zero; + return 32; +} + +int __inline __builtin_clz(uint32_t value) { + unsigned long leading_zero = 0; + if (_BitScanReverse(&leading_zero, value)) + return 31 - leading_zero; + return 32; +} + +#if defined(_M_ARM) || defined(_M_X64) +int __inline __builtin_clzll(uint64_t value) { + unsigned long leading_zero = 0; + if (_BitScanReverse64(&leading_zero, value)) + return 63 - leading_zero; + return 64; +} +#else +int __inline __builtin_clzll(uint64_t value) { + if (value == 0) + return 64; + uint32_t msh = (uint32_t)(value >> 32); + uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF); + if (msh != 0) + return __builtin_clz(msh); + return 32 + __builtin_clz(lsh); +} +#endif + +#define __builtin_clzl __builtin_clzll +#endif // defined(_MSC_VER) && !defined(__clang__) + +#endif // INT_LIB_H diff --git a/src/libclang/int_math.h b/src/libclang/int_math.h new file mode 100644 index 0000000..58d8990 --- /dev/null +++ b/src/libclang/int_math.h @@ -0,0 +1,106 @@ +//===-- int_math.h - internal math inlines --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is not part of the interface of this library. +// +// This file defines substitutes for the libm functions used in some of the +// compiler-rt implementations, defined in such a way that there is not a direct +// dependency on libm or math.h. Instead, we use the compiler builtin versions +// where available. This reduces our dependencies on the system SDK by foisting +// the responsibility onto the compiler. +// +//===----------------------------------------------------------------------===// + +#ifndef INT_MATH_H +#define INT_MATH_H + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#include +#include +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define CRT_INFINITY INFINITY +#else +#define CRT_INFINITY __builtin_huge_valf() +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_isfinite(x) _finite((x)) +#define crt_isinf(x) !_finite((x)) +#define crt_isnan(x) _isnan((x)) +#else +// Define crt_isfinite in terms of the builtin if available, otherwise provide +// an alternate version in terms of our other functions. This supports some +// versions of GCC which didn't have __builtin_isfinite. +#if __has_builtin(__builtin_isfinite) +#define crt_isfinite(x) __builtin_isfinite((x)) +#elif defined(__GNUC__) +#define crt_isfinite(x) \ + __extension__(({ \ + __typeof((x)) x_ = (x); \ + !crt_isinf(x_) && !crt_isnan(x_); \ + })) +#else +#error "Do not know how to check for infinity" +#endif // __has_builtin(__builtin_isfinite) +#define crt_isinf(x) __builtin_isinf((x)) +#define crt_isnan(x) __builtin_isnan((x)) +#endif // _MSC_VER + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_copysign(x, y) copysign((x), (y)) +#define crt_copysignf(x, y) copysignf((x), (y)) +#define crt_copysignl(x, y) copysignl((x), (y)) +#else +#define crt_copysign(x, y) __builtin_copysign((x), (y)) +#define crt_copysignf(x, y) __builtin_copysignf((x), (y)) +#define crt_copysignl(x, y) __builtin_copysignl((x), (y)) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_fabs(x) fabs((x)) +#define crt_fabsf(x) fabsf((x)) +#define crt_fabsl(x) fabs((x)) +#else +#define crt_fabs(x) __builtin_fabs((x)) +#define crt_fabsf(x) __builtin_fabsf((x)) +#define crt_fabsl(x) __builtin_fabsl((x)) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_fmax(x, y) __max((x), (y)) +#define crt_fmaxf(x, y) __max((x), (y)) +#define crt_fmaxl(x, y) __max((x), (y)) +#else +#define crt_fmax(x, y) __builtin_fmax((x), (y)) +#define crt_fmaxf(x, y) __builtin_fmaxf((x), (y)) +#define crt_fmaxl(x, y) __builtin_fmaxl((x), (y)) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_logbl(x) logbl((x)) +#else +#define crt_logbl(x) __builtin_logbl((x)) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_scalbn(x, y) scalbn((x), (y)) +#define crt_scalbnf(x, y) scalbnf((x), (y)) +#define crt_scalbnl(x, y) scalbnl((x), (y)) +#else +#define crt_scalbn(x, y) __builtin_scalbn((x), (y)) +#define crt_scalbnf(x, y) __builtin_scalbnf((x), (y)) +#define crt_scalbnl(x, y) __builtin_scalbnl((x), (y)) +#endif + +#endif // INT_MATH_H diff --git a/src/libclang/int_types.h b/src/libclang/int_types.h new file mode 100644 index 0000000..705355a --- /dev/null +++ b/src/libclang/int_types.h @@ -0,0 +1,186 @@ +//===-- int_lib.h - configuration header for compiler-rt -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is not part of the interface of this library. +// +// This file defines various standard types, most importantly a number of unions +// used to access parts of larger types. +// +//===----------------------------------------------------------------------===// + +#ifndef INT_TYPES_H +#define INT_TYPES_H + +#include "int_endianness.h" + +// si_int is defined in Linux sysroot's asm-generic/siginfo.h +#ifdef si_int +#undef si_int +#endif +typedef int32_t si_int; +typedef uint32_t su_int; +#if UINT_MAX == 0xFFFFFFFF +#define clzsi __builtin_clz +#define ctzsi __builtin_ctz +#elif ULONG_MAX == 0xFFFFFFFF +#define clzsi __builtin_clzl +#define ctzsi __builtin_ctzl +#else +#error could not determine appropriate clzsi macro for this system +#endif + +typedef int64_t di_int; +typedef uint64_t du_int; + +typedef union { + di_int all; + struct { +#if _YUGA_LITTLE_ENDIAN + su_int low; + si_int high; +#else + si_int high; + su_int low; +#endif // _YUGA_LITTLE_ENDIAN + } s; +} dwords; + +typedef union { + du_int all; + struct { +#if _YUGA_LITTLE_ENDIAN + su_int low; + su_int high; +#else + su_int high; + su_int low; +#endif // _YUGA_LITTLE_ENDIAN + } s; +} udwords; + +#if defined(__LP64__) || defined(__wasm__) || defined(__mips64) || \ + defined(__riscv) || defined(_WIN64) +#define CRT_HAS_128BIT +#endif + +// MSVC doesn't have a working 128bit integer type. Users should really compile +// compiler-rt with clang, but if they happen to be doing a standalone build for +// asan or something else, disable the 128 bit parts so things sort of work. +#if defined(_MSC_VER) && !defined(__clang__) +#undef CRT_HAS_128BIT +#endif + +#ifdef CRT_HAS_128BIT +typedef int ti_int __attribute__((mode(TI))); +typedef unsigned tu_int __attribute__((mode(TI))); + +typedef union { + ti_int all; + struct { +#if _YUGA_LITTLE_ENDIAN + du_int low; + di_int high; +#else + di_int high; + du_int low; +#endif // _YUGA_LITTLE_ENDIAN + } s; +} twords; + +typedef union { + tu_int all; + struct { +#if _YUGA_LITTLE_ENDIAN + du_int low; + du_int high; +#else + du_int high; + du_int low; +#endif // _YUGA_LITTLE_ENDIAN + } s; +} utwords; + +static __inline ti_int make_ti(di_int h, di_int l) { + twords r; + r.s.high = h; + r.s.low = l; + return r.all; +} + +static __inline tu_int make_tu(du_int h, du_int l) { + utwords r; + r.s.high = h; + r.s.low = l; + return r.all; +} + +#endif // CRT_HAS_128BIT + +typedef union { + su_int u; + float f; +} float_bits; + +typedef union { + udwords u; + double f; +} double_bits; + +typedef struct { +#if _YUGA_LITTLE_ENDIAN + udwords low; + udwords high; +#else + udwords high; + udwords low; +#endif // _YUGA_LITTLE_ENDIAN +} uqwords; + +// Check if the target supports 80 bit extended precision long doubles. +// Notably, on x86 Windows, MSVC only provides a 64-bit long double, but GCC +// still makes it 80 bits. Clang will match whatever compiler it is trying to +// be compatible with. On 32-bit x86 Android, long double is 64 bits, while on +// x86_64 Android, long double is 128 bits. +#if (defined(__i386__) || defined(__x86_64__)) && \ + !(defined(_MSC_VER) || defined(__ANDROID__)) +#define HAS_80_BIT_LONG_DOUBLE 1 +#elif defined(__m68k__) || defined(__ia64__) +#define HAS_80_BIT_LONG_DOUBLE 1 +#else +#define HAS_80_BIT_LONG_DOUBLE 0 +#endif + +typedef union { + uqwords u; + long double f; +} long_double_bits; + +#if __STDC_VERSION__ >= 199901L +typedef float _Complex Fcomplex; +typedef double _Complex Dcomplex; +typedef long double _Complex Lcomplex; + +#define COMPLEX_REAL(x) __real__(x) +#define COMPLEX_IMAGINARY(x) __imag__(x) +#else +typedef struct { + float real, imaginary; +} Fcomplex; + +typedef struct { + double real, imaginary; +} Dcomplex; + +typedef struct { + long double real, imaginary; +} Lcomplex; + +#define COMPLEX_REAL(x) (x).real +#define COMPLEX_IMAGINARY(x) (x).imaginary +#endif +#endif // INT_TYPES_H diff --git a/src/libclang/int_util.h b/src/libclang/int_util.h new file mode 100644 index 0000000..c372c2e --- /dev/null +++ b/src/libclang/int_util.h @@ -0,0 +1,47 @@ +//===-- int_util.h - internal utility functions ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is not part of the interface of this library. +// +// This file defines non-inline utilities which are available for use in the +// library. The function definitions themselves are all contained in int_util.c +// which will always be compiled into any compiler-rt library. +// +//===----------------------------------------------------------------------===// + +#ifndef INT_UTIL_H +#define INT_UTIL_H + +/// \brief Trigger a program abort (or panic for kernel code). +#define compilerrt_abort() __compilerrt_abort_impl(__FILE__, __LINE__, __func__) + +NORETURN void __compilerrt_abort_impl(const char *file, int line, + const char *function); + +#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__) +#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt) +#define COMPILE_TIME_ASSERT2(expr, cnt) \ + typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED + +// Force unrolling the code specified to be repeated N times. +#define REPEAT_0_TIMES(code_to_repeat) /* do nothing */ +#define REPEAT_1_TIMES(code_to_repeat) code_to_repeat +#define REPEAT_2_TIMES(code_to_repeat) \ + REPEAT_1_TIMES(code_to_repeat) \ + code_to_repeat +#define REPEAT_3_TIMES(code_to_repeat) \ + REPEAT_2_TIMES(code_to_repeat) \ + code_to_repeat +#define REPEAT_4_TIMES(code_to_repeat) \ + REPEAT_3_TIMES(code_to_repeat) \ + code_to_repeat + +#define REPEAT_N_TIMES_(N, code_to_repeat) REPEAT_##N##_TIMES(code_to_repeat) +#define REPEAT_N_TIMES(N, code_to_repeat) REPEAT_N_TIMES_(N, code_to_repeat) + +#endif // INT_UTIL_H diff --git a/src/libclang/muldf3.c b/src/libclang/muldf3.c new file mode 100644 index 0000000..f64b522 --- /dev/null +++ b/src/libclang/muldf3.c @@ -0,0 +1,25 @@ +//===-- lib/muldf3.c - Double-precision multiplication ------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float multiplication +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_mul_impl.inc" + +COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { return __mulXf3__(a, b); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) { return __muldf3(a, b); } +#else +COMPILER_RT_ALIAS(__muldf3, __aeabi_dmul) +#endif +#endif diff --git a/src/libclang/mulsf3.c b/src/libclang/mulsf3.c new file mode 100644 index 0000000..b9cf39a --- /dev/null +++ b/src/libclang/mulsf3.c @@ -0,0 +1,25 @@ +//===-- lib/mulsf3.c - Single-precision multiplication ------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float multiplication +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_mul_impl.inc" + +COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { return __mulXf3__(a, b); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) { return __mulsf3(a, b); } +#else +COMPILER_RT_ALIAS(__mulsf3, __aeabi_fmul) +#endif +#endif diff --git a/src/libclang/subsf3.c b/src/libclang/subsf3.c new file mode 100644 index 0000000..ecfc24f --- /dev/null +++ b/src/libclang/subsf3.c @@ -0,0 +1,27 @@ +//===-- lib/subsf3.c - Single-precision subtraction ---------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float subtraction. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +// Subtraction; flip the sign bit of b and add. +COMPILER_RT_ABI fp_t __subsf3(fp_t a, fp_t b) { + return __addsf3(a, fromRep(toRep(b) ^ signBit)); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) { return __subsf3(a, b); } +#else +COMPILER_RT_ALIAS(__subsf3, __aeabi_fsub) +#endif +#endif diff --git a/target.mk b/target.mk index 9b51b0e..47164c5 100644 --- a/target.mk +++ b/target.mk @@ -60,7 +60,7 @@ ELF2AOUT = $(TOPSRC)/tools/elf2aout/elf2aout CFLAGS = -Os -nostdinc LDFLAGS = -T$(TOPSRC)/src/elf32-mips.ld $(TOPSRC)/src/crt0.o -L$(TOPSRC)/src -LIBS = -lc +LIBS = -lc -lclang # Enable mips16e instruction set by default #CFLAGS += -mips16