Merge pull request #69 from alexfru/master

Floats in Smaller C!
This commit is contained in:
Serge Vakulenko
2016-01-16 23:35:14 -08:00
5 changed files with 2093 additions and 455 deletions

View File

@@ -32,5 +32,5 @@ install: smlrc
clean:
rm -f *.o smlrc smlrc.dis smlrc.elf
###
smlrc.o: smlrc.c cgmips.c
smlrc.o: smlrc.c fp.c cgmips.c
${CC} ${CFLAGS} -mips16 smlrc.c -c -o $@

View File

@@ -546,6 +546,11 @@ void GenFxnProlog(void)
int i, cnt = CurFxnParamCntMax;
if (cnt > 4)
cnt = 4;
// TBD!!! for structure passing use the cumulative parameter size
// instead of the number of parameters. Currently this bug is masked
// by the subroutine that pushes structures on the stack (it copies
// all words except the first to the stack). But passing structures
// in registers from assembly code won't always work.
for (i = 0; i < cnt; i++)
GenPrintInstr2Operands(MipsInstrSW, 0,
MipsOpRegA0 + i, 0,

323
src/cmd/smlrc/fp.c Normal file
View File

@@ -0,0 +1,323 @@
/*
Copyright (c) 2016, Alexey Frunze
2-clause BSD license.
*/
// This file implements the basic arithmetic functions on floats and
// expects that the host representation of floats coinsides with the
// target representation of floats and is IEEE 754 single precision.
// It is also expected that the host implementation does not raise
// any signals nor results in undefined behavior when handling
// special cases involving infinities, NaNs and denormal numbers.
// Make sure float and int are the same size, so they can be
// copied one into another byte by byte with memcpy().
extern char StAtIcAsSeRt[(sizeof(float) == sizeof(unsigned)) ? 1 : -1];
// There can be up to 112 significant digits in a 32-bit single precision
// floating point number (in the maximum positive denormal number).
// We'll ignore any following digits, but convert these 112 with proper
// rounding to even, which is enough for all floats representable exactly,
// which should be enough for most practical purposes.
#define MAX_CONST_DIGITS 112 // must be big enough for decimal/octal ints as well
unsigned char ConstDigits[MAX_CONST_DIGITS];
#define FP_MANT_BITS 23 // bits in mantissa, excluding the implicit 1
#define FP_EXP_BITS 8 // bits in exponent
#define FP_MIN_EXP (-46) // from min denormal, ~1.40e-45
// (-46 is chosen because we need to handle numbers
// like 9e-46 which round up to a non-zero denormal
// number ~1.40e-45)
#define FP_MAX_EXP 38 // from max normal, ~3.40e+38
#define FP_BUF_SIZE (((((MAX_CONST_DIGITS-FP_MIN_EXP)*10+2)/3+FP_MANT_BITS+2)+7)/8)
unsigned char ConstBinDigits[FP_BUF_SIZE];
STATIC
unsigned f2u(unsigned f)
{
float ff;
memcpy(&ff, &f, sizeof ff);
if (ff != ff || ff <= -1.0f || ff >= (float)0x10000*0x10000/* 2**32 */)
{
warnFloat2Int();
ff = 0;
}
return ff;
}
STATIC
unsigned f2i(unsigned f, int opsz)
{
float ff;
int ovf = 0;
memcpy(&ff, &f, sizeof ff);
if (ff != ff)
{
ovf = 1;
}
else
{
switch (opsz)
{
case 1: // unsigned char
ovf = ff <= -1.0f || ff >= 256.0f;
break;
case 2: // unsigned short
ovf = ff <= -1.0f || ff >= 65536.0f;
break;
case -1: // signed char
ovf = ff <= -129.0f || ff >= 128.0f;
break;
case -2: // signed short
ovf = ff <= -32769.0f || ff >= 32768.0f;
break;
default: // signed int
ovf = ff < -(float)0x8000*0x10000/* -2**31 */ || ff >= (float)0x8000*0x10000/* 2**31 */;
break;
}
}
if (ovf)
{
warnFloat2Int();
ff = 0;
}
return (int)ff;
}
STATIC
unsigned u2f(unsigned i)
{
float f = i;
memcpy(&i, &f, sizeof i);
return i;
}
STATIC
unsigned i2f(unsigned i)
{
float f = (int)i;
memcpy(&i, &f, sizeof i);
return i;
}
STATIC
unsigned fneg(unsigned f)
{
float ff;
memcpy(&ff, &f, sizeof ff);
ff = -ff;
memcpy(&f, &ff, sizeof f);
return f;
}
STATIC
unsigned fadd(unsigned fa, unsigned fb)
{
float ffa, ffb;
memcpy(&ffa, &fa, sizeof ffa);
memcpy(&ffb, &fb, sizeof ffb);
ffa = ffa + ffb;
memcpy(&fa, &ffa, sizeof fa);
return fa;
}
STATIC
unsigned fsub(unsigned fa, unsigned fb)
{
return fadd(fa, fneg(fb));
}
STATIC
unsigned fmul(unsigned fa, unsigned fb)
{
float ffa, ffb;
memcpy(&ffa, &fa, sizeof ffa);
memcpy(&ffb, &fb, sizeof ffb);
ffa = ffa * ffb;
memcpy(&fa, &ffa, sizeof fa);
return fa;
}
STATIC
unsigned fdiv(unsigned fa, unsigned fb)
{
float ffa, ffb;
memcpy(&ffa, &fa, sizeof ffa);
memcpy(&ffb, &fb, sizeof ffb);
ffa = ffa / ffb;
memcpy(&fa, &ffa, sizeof fa);
return fa;
}
STATIC
int fcmp(unsigned fa, unsigned fb, int nanRes)
{
float ffa, ffb;
memcpy(&ffa, &fa, sizeof ffa);
memcpy(&ffb, &fb, sizeof ffb);
if (ffa != ffa || ffb != ffb)
return nanRes;
if (ffa < ffb)
return -1;
if (ffa > ffb)
return +1;
return 0;
}
STATIC
void ChainMultiplyAdd(unsigned char* pChain,
unsigned ChainLen,
unsigned char Multiplier,
unsigned char Addend)
{
unsigned carry = Addend;
while (ChainLen--)
{
carry += *pChain * Multiplier;
*pChain++ = carry & 0xFFu;
carry >>= 8;
}
}
STATIC
void ChainDivide(unsigned char* pChain,
unsigned ChainLen,
unsigned char Divisor,
unsigned char* pRemainder)
{
unsigned remainder = 0;
while (ChainLen)
{
remainder += pChain[ChainLen - 1];
pChain[ChainLen - 1] = remainder / Divisor;
remainder = (remainder % Divisor) << 8;
ChainLen--;
}
if (pRemainder != NULL)
*pRemainder = remainder >> 8;
}
// Multiplies an integer (cnt decimal digits (0 to 9) from digits[]) by
// 10**eexp, converts the product to a float and returns it as unsigned int.
// This is an inefficient but straightforward algorithm with proper rounding.
STATIC
unsigned d2f(unsigned char* digits, int cnt, int eexp)
{
unsigned numDecDigits;
unsigned denDecDigits;
unsigned numBinDigits;
unsigned numBytes;
int tmp;
unsigned char remainder;
int binExp = 0;
int inexact = 0;
int lastInexact = 0;
unsigned res;
// 0?
if (cnt == 1 && *digits == 0)
return 0;
// less than the denormalized minimum?
if (eexp < FP_MIN_EXP - (cnt - 1))
return 0;
// greater than the normalized maximum?
if (eexp > FP_MAX_EXP - (cnt - 1))
return 0x7F800000; // +INF
numDecDigits = cnt + ((eexp >= 0) ? eexp : 0);
denDecDigits = 1 + ((eexp < 0) ? -eexp : 0);
// 10/3=3.3(3) > log2(10)~=3.32
if (eexp >= 0)
{
unsigned t1 = (numDecDigits * 10 + 2) / 3;
unsigned t2 = FP_MANT_BITS + 1;
numBinDigits = (t1 >= t2) ? t1 : t2;
}
else
{
unsigned t1 = (numDecDigits * 10 + 2) / 3;
unsigned t2 = (denDecDigits * 10 + 2) / 3 + FP_MANT_BITS + 1 + 1;
numBinDigits = (t1 >= t2) ? t1 : t2;
}
numBytes = (numBinDigits + 7) / 8;
if (numBytes > (unsigned)FP_BUF_SIZE)
errorInternal(200);
memset(ConstBinDigits, 0, numBytes);
// Convert the numerator to binary
for (tmp = 0; tmp < cnt; tmp++)
ChainMultiplyAdd(ConstBinDigits, numBytes, 10, ConstDigits[tmp]);
for (tmp = eexp; tmp > 0; tmp--)
ChainMultiplyAdd(ConstBinDigits, numBytes, 10, 0);
// If the denominator isn't 1, divide the numerator by the denominator
// getting at least FractionBitCnt+2 significant bits of quotient
if (eexp < 0)
{
binExp = -(int)(numBinDigits - (numDecDigits * 10 + 2) / 3);
for (tmp = binExp; tmp < 0; tmp++)
ChainMultiplyAdd(ConstBinDigits, numBytes, 2, 0);
for (tmp = eexp; tmp < 0; tmp++)
ChainDivide(ConstBinDigits, numBytes, 10, &remainder),
lastInexact = inexact, inexact |= !!remainder;
}
// Find the most significant bit and normalize the mantissa
// by shifting it left
for (tmp = numBytes - 1; tmp >= 0 && !ConstBinDigits[tmp]; tmp--);
if (tmp >= 0)
{
tmp = tmp * 8 + 7;
while (!(ConstBinDigits[tmp / 8] & (1 << tmp % 8))) tmp--;
while (tmp < FP_MANT_BITS)
ChainMultiplyAdd(ConstBinDigits, numBytes, 2, 0), binExp--, tmp++;
}
// Find the most significant bit and normalize the mantissa
// by shifting it right
do
{
remainder = 0;
for (tmp = numBytes - 1; tmp >= 0 && !ConstBinDigits[tmp]; tmp--);
if (tmp >= 0)
{
tmp = tmp * 8 + 7;
while (!(ConstBinDigits[tmp / 8] & (1 << tmp % 8))) tmp--;
while (tmp > FP_MANT_BITS)
ChainDivide(ConstBinDigits, numBytes, 2, &remainder),
lastInexact = inexact, inexact |= !!remainder, binExp++, tmp--;
while (binExp < 2 - (1 << (FP_EXP_BITS - 1)) - FP_MANT_BITS)
ChainDivide(ConstBinDigits, numBytes, 2, &remainder),
lastInexact = inexact, inexact |= !!remainder, binExp++;
}
// Round to nearest even
remainder &= (lastInexact | (ConstBinDigits[0] & 1));
if (remainder)
ChainMultiplyAdd(ConstBinDigits, numBytes, 1, 1);
} while (remainder);
// Collect the result's mantissa
res = 0;
while (tmp >= 0)
{
res <<= 8;
res |= ConstBinDigits[tmp / 8];
tmp -= 8;
}
// Collect the result's exponent
binExp += (1 << (FP_EXP_BITS - 1)) - 1 + FP_MANT_BITS;
if (!(res & (1u << FP_MANT_BITS))) binExp = 0; // subnormal or 0
res &= ~(1u << FP_MANT_BITS);
if (binExp >= (1 << FP_EXP_BITS) - 1)
binExp = (1 << FP_EXP_BITS) - 1, res = 0, inexact |= 1; // +INF
res |= (unsigned)binExp << FP_MANT_BITS;
return res;
}

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2013-2015, Alexey Frunze
Copyright (c) 2013-2016, Alexey Frunze
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
extern int main(int argc, char** argv);
void exit(int);
#ifdef __SMALLER_C_32__
extern void __x87init(void);
#endif
#ifdef _RETROBSD
void __start__(int argc, char** argv)
{
@@ -75,6 +79,7 @@ void __start__(int argc, char** argv)
#ifdef _LINUX
void __start__(int argc, char** argv)
{
__x87init();
exit(main(argc, argv));
}
#else
@@ -85,6 +90,9 @@ void __start__(void)
int argc;
char** argv;
__setargs__(&argc, &argv);
#ifdef __SMALLER_C_32__
__x87init();
#endif
exit(main(argc, argv));
}
#endif
@@ -371,6 +379,22 @@ void* memset(void* s, int c, unsigned n)
return s;
}
#ifdef __SMALLER_C_32__
int memcmp(void* s1, void* s2, unsigned n)
{
if (n)
{
unsigned char *p1 = s1, *p2 = s2;
do
{
if (*p1++ != *p2++)
return (*--p1 - *--p2);
} while (--n);
}
return 0;
}
#endif
int fputc(int c, FILE* stream);
int putchar(int c);
@@ -1548,4 +1572,187 @@ int fsetpos(FILE* stream, fpos_t* pos)
return 0;
}
#ifdef __SMALLER_C_32__
#ifndef _RETROBSD
#ifdef __HUGE__
#define xbp "bp"
#define xsp "sp"
#else
#define xbp "ebp"
#define xsp "esp"
#endif
void __x87init(void)
{
// Mask all exceptions, set rounding to nearest even and precision to 64 bits.
// TBD??? Actually check for x87???
asm("fninit");
}
float __floatsisf(int i)
{
asm
(
"fild dword ["xbp"+8]\n"
"fstp dword ["xbp"+8]\n"
"mov eax, ["xbp"+8]"
);
}
float __floatunsisf(unsigned i)
{
asm
(
"push dword 0\n"
"push dword ["xbp"+8]\n"
"fild qword ["xbp"-8]\n" // load 32-bit unsigned int as 64-bit signed int
"add "xsp", 8\n"
"fstp dword ["xbp"+8]\n"
"mov eax, ["xbp"+8]"
);
}
int __fixsfsi(float f)
{
asm
(
"sub "xsp", 4\n"
"fnstcw ["xbp"-2]\n" // save rounding
"mov ax, ["xbp"-2]\n"
"mov ah, 0x0c\n" // rounding towards 0 (AKA truncate) since we don't have fisttp
"mov ["xbp"-4], ax\n"
"fld dword ["xbp"+8]\n"
"fldcw ["xbp"-4]\n" // rounding = truncation
"fistp dword ["xbp"+8]\n"
"fldcw ["xbp"-2]\n" // restore rounding
"add "xsp", 4\n"
"mov eax, ["xbp"+8]"
);
}
unsigned __fixunssfsi(float f)
{
asm
(
"sub "xsp", 12\n"
"fnstcw ["xbp"-2]\n" // save rounding
"mov ax, ["xbp"-2]\n"
"mov ah, 0x0c\n" // rounding towards 0 (AKA truncate) since we don't have fisttp
"mov ["xbp"-4], ax\n"
"fld dword ["xbp"+8]\n"
"fldcw ["xbp"-4]\n" // rounding = truncation
"fistp qword ["xbp"-12]\n" // store 64-bit signed int
"fldcw ["xbp"-2]\n" // restore rounding
"mov eax, ["xbp"-12]\n" // take low 32 bits
"add "xsp", 12"
);
}
float __addsf3(float a, float b)
{
asm
(
"fld dword ["xbp"+8]\n"
"fadd dword ["xbp"+12]\n"
"fstp dword ["xbp"+8]\n"
"mov eax, ["xbp"+8]"
);
}
float __subsf3(float a, float b)
{
asm
(
"fld dword ["xbp"+8]\n"
"fsub dword ["xbp"+12]\n"
"fstp dword ["xbp"+8]\n"
"mov eax, ["xbp"+8]"
);
}
float __negsf2(float f)
{
asm
(
"mov eax, ["xbp"+8]\n"
"xor eax, 0x80000000"
);
}
float __mulsf3(float a, float b)
{
asm
(
"fld dword ["xbp"+8]\n"
"fmul dword ["xbp"+12]\n"
"fstp dword ["xbp"+8]\n"
"mov eax, ["xbp"+8]"
);
}
float __divsf3(float a, float b)
{
asm
(
"fld dword ["xbp"+8]\n"
"fdiv dword ["xbp"+12]\n"
"fstp dword ["xbp"+8]\n"
"mov eax, ["xbp"+8]"
);
}
float __lesf2(float a, float b)
{
asm
(
"fld dword ["xbp"+12]\n"
"fld dword ["xbp"+8]\n"
"fucompp\n"
"fstsw ax\n" // must use these moves since we don't have fucomip
"sahf\n"
"jnp .ordered\n"
"mov eax, +1\n" // return +1 if either a or b (or both) is a NaN
"jmp .done\n"
".ordered:\n"
"jnz .unequal\n"
"xor eax, eax\n" // return 0 if a == b
"jmp .done\n"
".unequal:\n"
"sbb eax, eax\n"
"jc .done\n" // return -1 if a < b
"inc eax\n" // return +1 if a > b
".done:"
);
}
float __gesf2(float a, float b)
{
asm
(
"fld dword ["xbp"+12]\n"
"fld dword ["xbp"+8]\n"
"fucompp\n"
"fstsw ax\n" // must use these moves since we don't have fucomip
"sahf\n"
"jnp .ordered\n"
"mov eax, -1\n" // return -1 if either a or b (or both) is a NaN
"jmp .done\n"
".ordered:\n"
"jnz .unequal\n"
"xor eax, eax\n" // return 0 if a == b
"jmp .done\n"
".unequal:\n"
"sbb eax, eax\n"
"jc .done\n" // return -1 if a < b
"inc eax\n" // return +1 if a > b
".done:"
);
}
#endif
#endif
#endif /*__APPLE__*/

File diff suppressed because it is too large Load Diff