Files
ldc/backend/cg87.c
Alexey Prokhin f2237662df Squashed 'dmd2/' changes from 10017d5..3443f38
3443f38 Fix issue 7493  Initialization of void[][N]
0b371da foreach can run semantic again
7216e2a fix Issue 7735 - Functions with variadic void[][]... arguments corrupt passed data
4fb2b2a Merge pull request #850 from 9rnsr/fix7773
9c59931 Merge pull request #851 from donc/ctfe7785pointerToVar
407f7e4 Merge pull request #852 from donc/segfault7639
9370f83 Fix issue 7380 Crash trying to use address of variable in struct constructor at module level
240866b Fix issue 7639 Undefined enum AA key crashes compiler
19b7096 Fix issue 7785 [CTFE] ICE when slicing pointer to variable
d9b11f6 fix Issue 7773 - UCFS syntax on built-in attributes too?
296d812 Merge pull request #846 from donc/ctfe7781segfault
65aca2d Merge pull request #848 from donc/regression7751
5576737 Merge pull request #849 from donc/bug7794
0310838 Merge pull request #828 from 9rnsr/fix7751
4027e4f Fix issue 7794 Sea of errors when calling regex() after compile error
59cc12d Fix issue 7781 [CTFE] Segmentation fault on 'mixin({return;}());'
3430947 fix seg fault in fail91.d
948274e Merge pull request #824 from donc/regression7745
22ac4b1 Merge pull request #826 from 9rnsr/fix6659
1c15841 Merge pull request #823 from redstar/mscclean
5f54752 Merge pull request #827 from 9rnsr/fix7694
399e4a3 Merge pull request #844 from donc/regression7782
516f49b Fix issue 7789 [CTFE] null pointer exception on setting array length
d74b354 Fix issue 7782 Regression: ICE with wrong import syntax
0269194 Fix issue 7751 [ICE] (Regression 2.059head) From auto and forward reference
42ad236 Merge pull request #830 from 9rnsr/fix_ufcs
67bf025 Merge pull request #832 from 9rnsr/fix7608
d13f107 Merge pull request #829 from 9rnsr/fix7754
e25cbe2 Merge pull request #834 from 9rnsr/fix2367
7fac235 merge D2 pull #842
c836773 Merge pull request #836 from 9rnsr/fix7757
a2754c5 Merge pull request #839 from 9rnsr/fix7768
4948836 fix Issue 7694 - Internal error: e2ir.c 1251 when calling member function inside struct via alias param
9f23335 Merge pull request #838 from 9rnsr/fix7621
92eba60 Merge pull request #840 from 9rnsr/fix7769
8fae3c2 fix issue 7742 - 'More initializers than fields' error with correct number of fields
6c2d706 to enum
35e4f08 fix Issue 7769 - relax inout rule doesn't work for template function
96a0105 fix Issue 7768 - More readable template error messages
8012d58 Merge pull request #831 from 9rnsr/fix7743
9c0cbdd fix Issue 7621 - Immutable type equivalence problem
f67f313 Merge pull request #833 from 9rnsr/fix7731
29754dd Merge pull request #837 from braddr/cleanup-backend2
374109a restore original binary() function and re-fix the new version
78c04aa fix Issue 7757 - Inout function with lazy inout parameter doesn't compile
50c34e9 fix Issue 7754 - static this() in template is stripped during header gen
11acdff Fix auto tester breaking.
f0b7157 fix Issue 7755 - regression(2.059head): ICE in glue.c
cfceb77 fix Issue 7751 - [ICE] From auto and forward reference
7a86807 fix Issue 2367 - Overloading error with string literals
6039c40 fix Issue 7731 - Assertion failure: 't' on line 7911 in file 'mtype.c'
aea3a39 fix Issue 7608 - __traits(allMembers) is broken
f46f07a fix Issue 7743 - Parsing problem with nothrow delegate
fa9d29f Revert "Revert "Refactor for UFCS property getter/setter resolution.""
d9698d8 Revert "Revert "fix Issue 7722 - Refuse normal functions to be used as properties""
0fbc772 Revert "Revert "Allow property function has two arguments""
07a3b09 fix Issue 6659 - Destructor in range foreach called after initialization
e499d4d Fix issue 7745 Regression(2.059beta) Methods defined in external object files when a pointer to it is taken
79a74e1 Fixes an unknown pragma warning.
2b12052 Fix issue 176 [module] message "module and package have the same name"
90e89a4 Merge pull request #814 from 9rnsr/fix7713
3ab0e79 Merge pull request #818 from donc/assoc7732
b3360e9 Fix issue 7732 [CTFE] wrong code for a struct called AssociativeArray
05f0b08 Merge pull request #779 from 9rnsr/fix7534
867e567 Revert "Allow property function has two arguments"
9171aeb Revert "fix Issue 7722 - Refuse normal functions to be used as properties"
989ced7 Revert "Refactor for UFCS property getter/setter resolution."
e9b5292 Refactor for UFCS property getter/setter resolution.
761d000 fix Issue 7722 - Refuse normal functions to be used as properties
9f5956b Allow property function has two arguments
1a11862 Revert "Allow property function has two arguments"
32f57e5 Revert "fix Issue 7722 - Refuse normal functions to be used as properties"
6489bb4 Revert "Refactor for UFCS property getter/setter resolution."
214296f Merge pull request #817 from 9rnsr/fix_ufcs
c3c7f2a Merge pull request #816 from donc/voidctfe6438
185d031 Refactor for UFCS property getter/setter resolution.
08bf89d fix Issue 7722 - Refuse normal functions to be used as properties
f0e3433 Allow property function has two arguments
1b67ac9 Direct check by Type::reliesOnTident
a3cd7d9 fix Issue 7713 - lambda inference doesn't work on template function argument
1762112 Fix issue 6438 - [CTFE] wrong error "value used before set" when slicing =void array
ace1eca fix complex constant folding
76f9b22 Consider return type covariance.
f700dbc fix Issue 7534 - Allow attribute-overloading of an overridden method
cba8f5c Merge pull request #763 from 9rnsr/fix7578
392d93f Merge pull request #815 from dawgfoto/fixSegFault
e48aba2 merge part of pull #769
d72a17e revert dd5a543
24d860b error(Loc loc,) doesn't abort program
4c79117 Use correct opcodes for moving cfloat from st->xmm and xmm->st
af875ff Merge pull request #785 from braddr/cleanup-backend2
9d3021a remove debugging printfs
b3df5ee Merge pull request #807 from dawgfoto/fix7698
f005537 Merge pull request #802 from dawgfoto/fixVC
65a145d Merge pull request #803 from donc/ctfeunion6681yebblies
1cf39ca Merge pull request #812 from 9rnsr/fix_ufcs
d846c3c Merge pull request #808 from 9rnsr/fix7702
fd0a492 fix Issue 7670 - UFCS problem with @property and structs
1ad35b2 Fix for UFCS with property syntax, and add exhaustive test
96f15a1 Resolve broken build after merging
4712aab fix regression
4e05482 Merge pull request #805 from donc/regression7681
245a107 dt_ functions aren't x86 specific
b35f43a another missing loc in an error() call
001addb minor cleanups
2fb1e46 make util_assert take a const string
907da39 cleanup whitespace in binary(), add binary() that takes the length of the string to search for
59d0425 Merge pull request #804 from braddr/nearsighted
d725eed Merge pull request #806 from donc/ctfe7633equalmsg
12a5c26 Merge pull request #811 from donc/bug7699
4279d5e revert the revert
c895c3b revert pull #809
865fb20 fix Issue 5733 - Calling opDispatch As Template Results in Compiler Infinite Loop
96e16d3 fix Issue 7702 - opDispatch goes into infinite loop
5e343c0 Remove special case for DotIdExp and opDispatch semantic, it isn't need anymore
1a9d607 Fix issue 7699 - Cannot get frame pointer to in contract when compiling with -inline
d1476eb Merge pull request #809 from 9rnsr/fix_funclit
afc7c60 allow out-of-order semantic analysis of fields
17da3a0 fix Issue 7705 - lambda syntax doesn't allow some valid signatures
e29d06d fix issue 7698
911d053 Fix issue 7633 - Missing CTFE error message
3802dde Fix issue 7681 Regression(2.059head):ICE:opCatAssign(delegate) to undefined identifier
8da4121 near-ectomy
cd6dc83 fix Library::error()s format string to take a const char*
f3f03c6 switch to apply()
faf873a fix Issue 3510 - Cannot forward reference a templated type from within a template mixin
23aa2be fix Issue 3509 - Cannot forward reference a template mixin's members in a compile-time context
e81309b Add missing 'loc' to error message.
b6898e3 Fix issue 6681 - struct constructor call is converted to struct literal that breaks union initialization
b79afba long double => longdouble
e48c319 Merge pull request #742 from yebblies/issue5879
d74485a Merge pull request #787 from eco/ddoc-srcfilename
3038cb9 Merge pull request #795 from dawgfoto/fixComment
89a039a Merge pull request #801 from dawgfoto/fix4507
c17c2d8 fix issue 4507
dd86c72 Merge pull request #796 from dawgfoto/fixVC
a516588 Merge pull request #797 from 9rnsr/fix7682
1b9839a Merge pull request #799 from 9rnsr/fix6982
4596774 Merge pull request #800 from 9rnsr/fix_type_deduction
b68d546 forgot about @system
bfe1083 add attributes to toHash
8f819d6 Stop special case in mutableOf/makeMutable with inout type.
319b1a3 Fix the lacks of type merging in Type::mutableOf() and uhSharedOf()
cfe7450 fix Issue 7671 - Broken inout deduction of shared(inout(T[n])) from immutable(int[3])
aca5c37 Stop too eager call of TypeAArray::getImpl() When implicitConvTo(non aa Tstruct => Taarray)
50b2a97 fix Issue 6982 - immutability isn't respected on associative array assignment
a5daa5e fix Issue 7684 - IFTI and shared overload doesn't work
e43fbac fix Issue 7682 - shared array type and "cast() is not an lvalue" error
8191801 cpp_prettyident only needed for C++
4487f75 fix ldval
525647c tparam is the specialization
f893925 fix issue 7592 d847c1c2dd
108b25d Merge pull request #780 from 9rnsr/fix7641
105a51f Merge pull request #784 from 9rnsr/fix7110
8b5b67f Merge pull request #792 from donc/bug7667
f72f237 fix Issue 3682 - Regression(2.038) is expression fails to match types
436b711 Fix issue 7667. ICE(interpret.c): 'ctfeStack.stackPointer() == 0'
9005276 Merge pull request #679 from yebblies/issue783
350a3ce Merge pull request #582 from 9rnsr/fix3382_ufcs
5f020c3 Merge pull request #788 from braddr/cleanup-backend3
6aa91cf Merge pull request #790 from p0nce/master
351d595 remove tls bracketing
a137d72 Fix bug #6391
6ce219c remove some of the bracketing
aec4c13 fix Issue 7578 - ICE on indexing result of vararg opDispatch
95e3dc1 Fix unintended infinite loop in Phobos build
b66196a fix Issue 3382 - [tdpl] Implement uniform function call syntax
ee2fe6c Fix 977 is with counting end-of-lines towards msot advanced lexer peeking
7790b16 fix Issue 7650 - Bad lambda inference in associative array literal
c03484e fix Issue 7649 - Bad lambda inference in default function argument
f293a10 fix Issue 7499 - [ICE] ('cast.c line 1495) with lambda array
9f0622c Expression::inferType() and remove FuncExp::setType()
cfc67b7 refactor lambda inference process
6d49586 more de-TX86'ing in relation to a bunch of OP codes
2efbf6a TX86-ectomy in evalu8.c
953f6d7 rip TX86 conditionals out of el.c
d5663c7 fix Issue 7595 - Data being overwritten.
449c165 Add predefined Ddoc macro SRCFILENAME
5c5da66 fix uninitialized field
29cde54 Merge pull request #783 from 9rnsr/fix7038
06d65ab fix Issue 7038 - Type mismatch with const struct
b77e2c9 fix Issue 7110 - opSlice() & opIndex functions works unstable as template arguments
a65f02f Merge pull request #781 from braddr/fix
08d6cd5 Merge pull request #782 from braddr/fixiasm
2492332 fix latent bug with Lexer::peek and recently introduced bug in Lexer::scan
ec1888e initialize popndTmp rather than rely on carefulness when usNumops == 0 and emitting a vector instruction, popndTmp is left uninitialized and is later dereferenced.
1d4a742 Merge pull request #766 from 9rnsr/fix7563
e1cd535 refactor
90f8dcf fix Issue 7641 - std.typecons.Proxy incorrectly allows implicit conversion to class
83a93cf Merge pull request #778 from dawgfoto/MoreSpellCorrection
7f0bcde 2nd go at fix issue 5590
567d7df fix Issue 5590 - Regression(2.036) ICE(e2ir.c): when using .values on enum which is associative array
48ea951 fix Issue 4820 - Regression(1.058, 2.044) in DStress caused by changeset 452
e8f9f3b more spell correction
afd9a45 fix Issue 7618 - delegate/function pointer call bypass parameter storage class
dabcdfb Merge pull request #773 from 9rnsr/fix7583
9846bb2 Merge pull request #774 from donc/ctfe7568
8c20445 Merge pull request #775 from donc/_error6785
d41e58e Avoiding shallow copy is more better.
cccef09 Revert "fix Issue 7585 - functions in templates inferred as delegate"
fc8dfc0 6785 Wrong error message from pragma(msg) of failed instantiation
61ec04d 7568 pragma(msg) segfaults with an aggregate including a class.
4d86d39 Merge pull request #767 from 9rnsr/fix7585
207d351 fix Issue 7583 - [CTFE] ICE with tuple and alias this
53bafa2 fix Issue 7411 - Deduce base type from vector types in templates
5ab1bd9 fix Issue 7518 - std.array.empty doesn't work for shared arrays
a1030d3 fix Issue 7554 - Immutable function pointer arguments too
5e96900 Merge pull request #771 from donc/bug7589
2287ebc fix Issue 7547 - -deps output lists object as a top level module
e611781 7589 __traits(compiles) does not work with a template that fails to compile
0113cde fix Issue 7585 - functions in templates inferred as delegate
4b978d5 fix Issue 7563 - Class members with default template arguments have no type
4d68981 fix Issue 7500 - [ICE] (template.c line 5287) with immutable lambda function
1a39c3c missed a line
6dd89ca Merge pull request #765 from 9rnsr/fix7525
8d6dcac fix Issue 7502 - 2.056 regression: Assigning .init takes forever to compile for large structs
042096e fix Issue 7525 - Broken return type inference for delegate returns
c5affa5 fix Issue 7582 - Untyped nested delegate literals don't compile
121677c fix Issue 7580 - Identity assignment of Nullable crashes dmd
adc0502 Small refactoring to resolve alias this.
1f52383 Merge pull request #671 from yebblies/issue4958
2a12345 fix build breakage
8755819 fix build
ba86204 fix vcbuild
464c664 fix linux build
31197c8 tweaked command line moved some inline asm to C-function to not interfere with optimizations build with VS2011
4dcdc9c increase stack size for win64 build
77262aa add missing include to root
56afe3f batch to build through win32.mak
5a0fd30 build through win32.mak
a5b5190 long_double -> longdouble remove C99 printf add Win64 support
9640110 vcbuild
b619171 Merge pull request #761 from donc/ctfe7473structref
7756328 Merge pull request #725 from kennytm/bug7399-import-too-fatal
bbac9e4 Merge pull request #759 from yebblies/issue1149
d1ff23b 7473 [CTFE] Non-ref argument behaves as if it's a ref argument
ab5cb18 Fix OPmsw codegen - integer only is too restrictive.
a00833b Merge pull request #743 from yebblies/issue3354
b006e11 Merge pull request #757 from 9rnsr/fix7562
3bccbb0 fix Issue 7562 - DMD crashes by using TemplateThisParameter
a7dc50e Merge pull request #749 from yebblies/issue1149
a873c5f Merge pull request #758 from 9rnsr/fix5525
5d639ec fix Issue 5525 - Eponymous templates should allow for overloaded eponymous members
f50852c Merge pull request #729 from donc/gag4269
de02523 fix Issue 3927 - array.length++; is an error, but ++array.length compiles
1dc5bfd Merge pull request #680 from yebblies/issue3812
cf887ba move errors to Dsymbol
fc4acf5 Merge pull request #755 from donc/seaOfErrors7557
be2f3a9 7557b soldier on through dottemplate expressions
8cec825 7557 Sea of errors after template failure
37ec6d6 A small fixup to call Type::defaultInitLiteral
7b5e2cb Revert "Revert "Merge pull request #41 from 9rnsr/rvalue-struct-literal""
3d8f09a Merge branch 'master' of github.com:D-Programming-Language/dmd
7dfb4cc Merge pull request #752 from braddr/cleanup-backend2
1b28f51 Merge branch 'master' of github.com:D-Programming-Language/dmd
31ad73c Merge pull request #746 from yebblies/issue5554
25f770d Change lexer to support # as a token, preserving #line's original behavior
dd8d20a Revert "Merge pull request #41 from 9rnsr/rvalue-struct-literal"
ee2fdf9 Merge pull request #41 from 9rnsr/rvalue-struct-literal
f94fdbf Merge pull request #750 from yebblies/issue3630
61f5fcf Improve codegen for OPmsw
05a3fa4 Merge pull request #744 from Safety0ff/avx-fix
0231d6a Merge pull request #748 from 9rnsr/fix7552
9a97979 Merge pull request #751 from donc/ctfe7536
e091e6e 7536 ctfeAdrOnStack triggered
c9edaf4 fix Issue 7552 - Cannot get and combine a part of overloaded functions
1edeba9 Fix Issue 3630 - bad error location in "has no effect in expression" error
7d0fb72 Fix Issue 5554 - [qtd] Covariance detection failure
4f36aca fix Issue 7550 - Missing AVX instruction VPMULDQ
0b82dfe Fix Issue 5879 - Not all frontend errors use stderr
963a41a Merge pull request #695 from yebblies/refactor_expression
3f06690 Fix Issue 3354 - asm fld x, ST(6); accepted
713f69f Merge pull request #677 from yebblies/issue4241
cf22ce3 Merge pull request #711 from yebblies/issue3559
56ca73c Merge pull request #700 from kennytm/bug7452_lazy_safe
c4dc723 Merge pull request #736 from ibuclaw/in_gcc
121c9b9 Merge pull request #737 from yebblies/issue7544
cedcb3c Merge pull request #740 from yebblies/issue7545
fb3e8f2 Merge pull request #741 from dawgfoto/DMCWarning
5d26c1e Merge pull request #735 from 9rnsr/fix7105
734a921 dmc warning
1e1cfbc Fix Issue 7545 - ICE(cast.c) Merge integral types through alias this
6b135be Fix Issue 7544 - ICE(interpret.c) Catching an exception with a null catch block
c5336f9 Update already existing gdc-specific code, harmonise headers.
44b8d59 Merge pull request #703 from kennytm/bug435_template_ctor
6b368e1 Merge pull request #707 from yebblies/issue3822
8439e07 Merge pull request #717 from yebblies/issue6611
2b4502e fix Issue 7105 - relax inout rules
ac4463a wildsubparam isn't need anymore, because it works properly.
f77879a Issue 6611 - better error message for array post increment/decrement
7393395 Merge pull request #716 from yebblies/issue6685
77568f0 Merge pull request #719 from yebblies/issue4536
9accb04 tired of tdata()
5fbd5a2 Merge pull request #732 from dawgfoto/fix5412
41a901a Revert "hide private/package module level symbols"
23d5e14 Merge pull request #733 from dawgfoto/HideModuleMembers
e2f8a23 hide private/package module level symbols
ae75287 detect collisions with renamed imports
75a2442 fix Dsymbol::search_correct
50e122a Merge pull request #723 from kennytm/bug7504_null_array
c5b7601 Revert "fix 7494 - selective imports in function scope"
aa6f4d9 Revert "fix Protection"
5be660e Revert "fix Imports"
040371b Revert "detect collisions with renamed imports"
0159818 Revert "find private symbols during spell correction"
0c95c45 find private symbols during spell correction
ca22fb2 detect collisions with renamed imports
0dca0af fix Imports
37d4fda fix Protection
16a2e7e fix 7494 - selective imports in function scope
c16f5b2 Merge pull request #667 from 9rnsr/fix7406
f776617 explanatory comments belong in the code, not bugzilla
bfa2060 Merge pull request #704 from donc/_error6699
f46705c fix fail222 regression
28d9635 Merge pull request #708 from donc/soldieron7481
2c2a7af Merge pull request #715 from 9rnsr/fix6738
98cfa64 Merge pull request #722 from 9rnsr/fix7353
b040567 revert pull 724
0e84f63 revert part of pull 724
400f702 Merge pull request #724 from yebblies/issue3632
d82cc74 Merge pull request #720 from yebblies/issue3279
2da3bed Merge pull request #718 from yebblies/fixdebugmsg
f6627ec 7527 [CTFE] Segfault when slicing a pointer at compile time
c8f09bf 4269a Regression(2.031): invalid type accepted if evaluated while errors are gagged
d10fba0 implement const/purity/nothrow/@safe inheritance
ad689fb Fix bug 7399: Broken import statement in trySemantic() causes silent compiler error
eb0c643 Add global.speculativeGag
c18220a Refactor isSpeculativeFunction into Dsymbol
f5c56d8 Issue 3632 - modify float is float to do a bitwise compare
af1cab4 Issue 7353 - NRVO not properly working with inferred return type
03ee438 Fix bug 7504: Cannot assign an object of type 'typeof(null)' to an array
dfb941c Remove debug printing in code that generates errors.
62118e3 Issue 4536 - Typetuples (T...) should have an .init member
989da7b Issue 3279 - Confusing error message when comparing types
36e8045 Issue 6685 - Allow using "with" with rvalues
60cbc6f fix issue 6738 revisited
4e20e7d Issue 3822 - Invalid optimization of alloca called with constant size
b37bf8c Fixes bug 435: Constructors should be templatized
ad8157d Issue 3559 - DMD 1.048+ fails to take function pointer from overloaded member functions
838cd06 7481 Compiler should 'soldier on' after template errors
673063e Simplify fix for 6699
1a0b199 6699a __error when instantiating function template
b6d072d 6699b __error in alias expression
df16ffa 6699c __error in synchronized error message
338f804 7462 Error message with _error_ in overridden function
0f60bd3 7463 Duplicated error message with bad template value parameter
f43e93a 6699E: _error inside error msg for bad base class
5109a5a Fixes bug 7452.
04d888f Refactor XxxAssignExp semantic
73973d6 Issue 3812 - Missing line number for implicit cast of variadic function to array
f0bbf18 Issue 3927 - array.length++; is an error, but ++array.length compiles
24576c2 Issue 783 - Cannot use an array w/ const or variable index as new[] size argument.
7e4cd4b Issue 4241 - duplicate union initialization error doesn't give a file location
9987127 Issue 4958 - Floating point enums should check for total loss of precision
60287fd Issue 7406 - tuple foreach doesn't work with mixed tuples
633d88e Issue 5889 - Struct literal/construction should be rvalue
5d5f78a Now function overloading with ref and non-ref parameter is legal for struct type

git-subtree-dir: dmd2
git-subtree-split: 3443f38fc4c17807a0f36005a05d598cfc7301db
2012-04-05 11:45:25 +04:00

3636 lines
111 KiB
C

// Copyright (C) 1987-1995 by Symantec
// Copyright (C) 2000-2011 by Digital Mars
// All Rights Reserved
// http://www.digitalmars.com
// Written by Walter Bright
/*
* This source file is made available for personal use
* only. The license is in /dmd/src/dmd/backendlicense.txt
* or /dm/src/dmd/backendlicense.txt
* For any other uses, please contact Digital Mars.
*/
#if !SPP
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <math.h>
#include "cc.h"
#include "el.h"
#include "oper.h"
#include "code.h"
#include "global.h"
static char __file__[] = __FILE__; /* for tassert.h */
#include "tassert.h"
// Constants that the 8087 supports directly
// BUG: rewrite for 80 bit long doubles
#define PI 3.14159265358979323846
#define LOG2 0.30102999566398119521
#define LN2 0.6931471805599453094172321
#define LOG2T 3.32192809488736234787
#define LOG2E 1.4426950408889634074 /* 1/LN2 */
#define FWAIT 0x9B /* FWAIT opcode */
/* Mark variable referenced by e as not a register candidate */
#define notreg(e) ((e)->EV.sp.Vsym->Sflags &= ~GTregcand)
/* Generate the appropriate ESC instruction */
#define ESC(MF,b) (0xD8 + ((MF) << 1) + (b))
enum MF
{ // Values for MF
MFfloat = 0,
MFlong = 1,
MFdouble = 2,
MFword = 3
};
NDP _8087elems[8]; // 8087 stack
NDP ndp_zero;
int stackused = 0; /* number of items on the 8087 stack */
/*********************************
*/
struct Dconst
{
int round;
symbol *roundto0;
symbol *roundtonearest;
};
static Dconst oldd;
#define NDPP 0 // print out debugging info
#define NOSAHF (I64 || config.fpxmmregs) // can't use SAHF instruction
code *loadComplex(elem *e);
code *opmod_complex87(elem *e,regm_t *pretregs);
code *opass_complex87(elem *e,regm_t *pretregs);
code * genf2(code *c,unsigned op,unsigned rm);
#define CW_roundto0 0xFBF
#define CW_roundtonearest 0x3BF
STATIC code *genrnd(code *c, short cw);
/**********************************
* When we need to temporarilly save 8087 registers, we record information
* about the save into an array of NDP structs:
*/
NDP *NDP::save = NULL;
int NDP::savemax = 0; /* # of entries in NDP::save[] */
int NDP::savetop = 0; /* # of entries used in NDP::save[] */
#ifdef DEBUG
#define NDPSAVEINC 2 /* flush reallocation bugs */
#else
#define NDPSAVEINC 8 /* allocation chunk sizes */
#endif
/****************************************
* Store/load to ndp save location i
*/
code *ndp_fstp(code *c, int i, tym_t ty)
{ unsigned grex = I64 ? (REX_W << 16) : 0;
switch (tybasic(ty))
{
case TYfloat:
case TYifloat:
case TYcfloat:
c = genc1(c,0xD9,grex | modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP]
break;
case TYdouble:
case TYdouble_alias:
case TYidouble:
case TYcdouble:
c = genc1(c,0xDD,grex | modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP]
break;
case TYldouble:
case TYildouble:
case TYcldouble:
c = genc1(c,0xDB,grex | modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP]
break;
default:
assert(0);
}
return c;
}
code *ndp_fld(code *c, int i, tym_t ty)
{ unsigned grex = I64 ? (REX_W << 16) : 0;
switch (tybasic(ty))
{
case TYfloat:
case TYifloat:
case TYcfloat:
c = genc1(c,0xD9,grex | modregrm(2,0,BPRM),FLndp,i);
break;
case TYdouble:
case TYdouble_alias:
case TYidouble:
case TYcdouble:
c = genc1(c,0xDD,grex | modregrm(2,0,BPRM),FLndp,i);
break;
case TYldouble:
case TYildouble:
case TYcldouble:
c = genc1(c,0xDB,grex | modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP]
break;
default:
assert(0);
}
return c;
}
/**************************
* Return index of empty slot in NDP::save[].
*/
STATIC int getemptyslot()
{ int i;
for (i = 0; i < NDP::savemax; i++)
if (NDP::save[i].e == NULL)
goto L1;
/* Out of room, reallocate NDP::save[] */
NDP::save = (NDP *)mem_realloc(NDP::save,
(NDP::savemax + NDPSAVEINC) * sizeof(*NDP::save));
/* clear out new portion of NDP::save[] */
memset(NDP::save + NDP::savemax,0,NDPSAVEINC * sizeof(*NDP::save));
i = NDP::savemax;
NDP::savemax += NDPSAVEINC;
L1: if (i >= NDP::savetop)
NDP::savetop = i + 1;
return i;
}
/*********************************
* Pop 8087 stack.
*/
#undef pop87
void pop87(
#ifdef DEBUG
int line, const char *file
#endif
)
#ifdef DEBUG
#define pop87() pop87(__LINE__,__FILE__)
#endif
{
int i;
#if NDPP
dbg_printf("pop87(%s(%d): stackused=%d)\n", file, line, stackused);
#endif
--stackused;
assert(stackused >= 0);
for (i = 0; i < arraysize(_8087elems) - 1; i++)
_8087elems[i] = _8087elems[i + 1];
/* end of stack is nothing */
_8087elems[arraysize(_8087elems) - 1] = ndp_zero;
}
/*******************************
* Push 8087 stack. Generate and return any code
* necessary to preserve anything that might run off the end of the stack.
*/
#undef push87
#ifdef DEBUG
code *push87(int line, const char *file);
code *push87() { return push87(__LINE__,__FILE__); }
#endif
code *push87(
#ifdef DEBUG
int line, const char *file
#endif
)
#ifdef DEBUG
#define push87() push87(__LINE__,__FILE__)
#endif
{
code *c;
int i;
c = CNIL;
// if we would lose the top register off of the stack
if (_8087elems[7].e != NULL)
{
i = getemptyslot();
NDP::save[i] = _8087elems[7];
c = genf2(c,0xD9,0xF6); // FDECSTP
c = genfwait(c);
c = ndp_fstp(c, i, _8087elems[7].e->Ety); // FSTP i[BP]
assert(stackused == 8);
if (NDPP) dbg_printf("push87() : overflow\n");
}
else
{
#ifdef DEBUG
if (NDPP) dbg_printf("push87(%s(%d): %d)\n", file, line, stackused);
#endif
stackused++;
assert(stackused <= 8);
}
// Shift the stack up
for (i = 7; i > 0; i--)
_8087elems[i] = _8087elems[i - 1];
_8087elems[0] = ndp_zero;
return c;
}
/*****************************
* Note elem e as being in ST(i) as being a value we want to keep.
*/
#ifdef DEBUG
void note87(elem *e, unsigned offset, int i, int linnum);
void note87(elem *e, unsigned offset, int i)
{
return note87(e, offset, i, 0);
}
void note87(elem *e, unsigned offset, int i, int linnum)
#define note87(e,offset,i) note87(e,offset,i,__LINE__)
#else
void note87(elem *e, unsigned offset, int i)
#endif
{
#if NDPP
printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,stackused,linnum);
#endif
#if 0 && DEBUG
if (_8087elems[i].e)
printf("_8087elems[%d].e = %p\n",i,_8087elems[i].e);
#endif
//if (i >= stackused) *(char*)0=0;
assert(i < stackused);
_8087elems[i].e = e;
_8087elems[i].offset = offset;
}
/****************************************************
* Exchange two entries in 8087 stack.
*/
void xchg87(int i, int j)
{
NDP save;
save = _8087elems[i];
_8087elems[i] = _8087elems[j];
_8087elems[j] = save;
}
/****************************
* Make sure that elem e is in register ST(i). Reload it if necessary.
* Input:
* i 0..3 8087 register number
* flag 1 don't bother with FXCH
*/
#ifdef DEBUG
STATIC code * makesure87(elem *e,unsigned offset,int i,unsigned flag,int linnum)
#define makesure87(e,offset,i,flag) makesure87(e,offset,i,flag,__LINE__)
#else
STATIC code * makesure87(elem *e,unsigned offset,int i,unsigned flag)
#endif
{
code *c;
int j;
#ifdef DEBUG
if (NDPP) printf("makesure87(e=%p, offset=%d, i=%d, flag=%d, line=%d)\n",e,offset,i,flag,linnum);
#endif
assert(e && i < 4);
c = CNIL;
L1:
if (_8087elems[i].e != e || _8087elems[i].offset != offset)
{
#ifdef DEBUG
if (_8087elems[i].e)
printf("_8087elems[%d].e = %p, .offset = %d\n",i,_8087elems[i].e,_8087elems[i].offset);
#endif
assert(_8087elems[i].e == NULL);
for (j = 0; 1; j++)
{
if (j >= NDP::savetop && e->Eoper == OPcomma)
{
e = e->E2; // try right side
goto L1;
}
#ifdef DEBUG
if (j >= NDP::savetop)
printf("e = %p, NDP::savetop = %d\n",e,NDP::savetop);
#endif
assert(j < NDP::savetop);
//printf("\tNDP::save[%d] = %p, .offset = %d\n", j, NDP::save[j].e, NDP::save[j].offset);
if (e == NDP::save[j].e && offset == NDP::save[j].offset)
break;
}
c = push87();
c = genfwait(c);
c = ndp_fld(c, j, e->Ety); // FLD j[BP]
if (!(flag & 1))
{
while (i != 0)
{
genf2(c,0xD9,0xC8 + i); // FXCH ST(i)
i--;
}
}
NDP::save[j] = ndp_zero; // back in 8087
}
//_8087elems[i].e = NULL;
return c;
}
/****************************
* Save in memory any values in the 8087 that we want to keep.
*/
code *save87()
{
code *c;
int i;
c = CNIL;
while (_8087elems[0].e && stackused)
{
/* Save it */
i = getemptyslot();
if (NDPP) printf("saving %p in temporary NDP::save[%d]\n",_8087elems[0].e,i);
NDP::save[i] = _8087elems[0];
c = genfwait(c);
c = ndp_fstp(c,i,_8087elems[0].e->Ety); // FSTP i[BP]
pop87();
}
if (c) /* if any stores */
genfwait(c); /* wait for last one to finish */
return c;
}
/******************************************
* Save any noted values that would be destroyed by n pushes
*/
code *save87regs(unsigned n)
{
unsigned j;
unsigned k;
code *c = NULL;
assert(n <= 7);
j = 8 - n;
if (stackused > j)
{
for (k = 8; k > j; k--)
{
c = genf2(c,0xD9,0xF6); // FDECSTP
c = genfwait(c);
if (k <= stackused)
{ int i;
i = getemptyslot();
c = ndp_fstp(c, i, _8087elems[k - 1].e->Ety); // FSTP i[BP]
NDP::save[i] = _8087elems[k - 1];
_8087elems[k - 1] = ndp_zero;
}
}
for (k = 8; k > j; k--)
{
if (k > stackused)
{ c = genf2(c,0xD9,0xF7); // FINCSTP
c = genfwait(c);
}
}
stackused = j;
}
return c;
}
/*****************************************************
* Save/restore ST0 or ST01
*/
void gensaverestore87(regm_t regm, code **csave, code **crestore)
{
//printf("gensaverestore87(%s)\n", regm_str(regm));
code *cs1 = *csave;
code *cs2 = *crestore;
assert(regm == mST0 || regm == mST01);
int i = getemptyslot();
NDP::save[i].e = el_calloc(); // this blocks slot [i] for the life of this function
cs1 = ndp_fstp(cs1, i, TYldouble);
cs2 = cat(ndp_fld(CNIL, i, TYldouble), cs2);
if (regm == mST01)
{
int j = getemptyslot();
NDP::save[j].e = el_calloc();
cs1 = ndp_fstp(cs1, j, TYldouble);
cs2 = cat(ndp_fld(CNIL, j, TYldouble), cs2);
}
*csave = cs1;
*crestore = cs2;
}
/*************************************
* Find which, if any, slot on stack holds elem e.
*/
STATIC int cse_get(elem *e, unsigned offset)
{ int i;
for (i = 0; 1; i++)
{
if (i == stackused)
{
i = -1;
//printf("cse not found\n");
//elem_print(e);
break;
}
if (_8087elems[i].e == e &&
_8087elems[i].offset == offset)
{ //printf("cse found %d\n",i);
//elem_print(e);
break;
}
}
return i;
}
/*************************************
* Reload common subexpression.
*/
code *comsub87(elem *e,regm_t *pretregs)
{ code *c;
//printf("comsub87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
// Look on 8087 stack
int i = cse_get(e, 0);
if (tycomplex(e->Ety))
{
unsigned sz = tysize(e->Ety);
int j = cse_get(e, sz / 2);
if (i >= 0 && j >= 0)
{
c = push87();
c = cat(c, push87());
c = genf2(c,0xD9,0xC0 + i); // FLD ST(i)
c = genf2(c,0xD9,0xC0 + j + 1); // FLD ST(j + 1)
c = cat(c,fixresult_complex87(e,mST01,pretregs));
}
else
// Reload
c = loaddata(e,pretregs);
}
else
{
if (i >= 0)
{
c = push87();
c = genf2(c,0xD9,0xC0 + i); // FLD ST(i)
if (*pretregs & XMMREGS)
c = cat(c,fixresult87(e,mST0,pretregs));
else
c = cat(c,fixresult(e,mST0,pretregs));
}
else
// Reload
c = loaddata(e,pretregs);
}
freenode(e);
return c;
}
/**************************
* Generate code to deal with floatreg.
*/
code * genfltreg(code *c,unsigned opcode,unsigned reg,targ_size_t offset)
{
floatreg = TRUE;
reflocal = TRUE;
if ((opcode & ~7) == 0xD8)
c = genfwait(c);
return genc1(c,opcode,modregxrm(2,reg,BPRM),FLfltreg,offset);
}
/*******************************
* Decide if we need to gen an FWAIT.
*/
code *genfwait(code *c)
{
if (ADDFWAIT())
c = gen1(c,FWAIT);
return c;
}
/***************************************
* Generate floating point instruction.
*/
code * genf2(code *c,unsigned op,unsigned rm)
{
return gen2(genfwait(c),op,rm);
}
/***************************
* Put the 8087 flags into the CPU flags.
*/
STATIC code * cg87_87topsw(code *c)
{
/* Note that SAHF is not available on some early I64 processors
* and will cause a seg fault
*/
c = cat(c,getregs(mAX));
if (config.target_cpu >= TARGET_80286)
c = genf2(c,0xDF,0xE0); // FSTSW AX
else
{ c = genfltreg(c,0xD8+5,7,0); /* FSTSW floatreg[BP] */
genfwait(c); /* FWAIT */
genfltreg(c,0x8A,4,1); /* MOV AH,floatreg+1[BP] */
}
gen1(c,0x9E); // SAHF
code_orflag(c,CFpsw);
return c;
}
/***************************
* Set the PSW based on the state of ST0.
* Input:
* pop if stack should be popped after test
* Returns:
* start of code appended to c.
*/
STATIC code * genftst(code *c,elem *e,int pop)
{
if (NOSAHF)
{
c = cat(c,push87());
c = gen2(c,0xD9,0xEE); // FLDZ
gen2(c,0xDF,0xE9); // FUCOMIP ST1
pop87();
if (pop)
{ c = genf2(c,0xDD,modregrm(3,3,0)); // FPOP
pop87();
}
}
else if (config.flags4 & CFG4fastfloat) // if fast floating point
{
c = genf2(c,0xD9,0xE4); // FTST
c = cg87_87topsw(c); // put 8087 flags in CPU flags
if (pop)
{ c = genf2(c,0xDD,modregrm(3,3,0)); // FPOP
pop87();
}
}
else if (config.target_cpu >= TARGET_80386)
{
// FUCOMP doesn't raise exceptions on QNANs, unlike FTST
c = cat(c,push87());
c = gen2(c,0xD9,0xEE); // FLDZ
gen2(c,pop ? 0xDA : 0xDD,0xE9); // FUCOMPP / FUCOMP
pop87();
if (pop)
pop87();
cg87_87topsw(c); // put 8087 flags in CPU flags
}
else
{
// Call library function which does not raise exceptions
regm_t regm = 0;
c = cat(c,callclib(e,CLIBftest,&regm,0));
if (pop)
{ c = genf2(c,0xDD,modregrm(3,3,0)); // FPOP
pop87();
}
}
return c;
}
/*************************************
* Determine if there is a special 8087 instruction to load
* constant e.
* Input:
* im 0 load real part
* 1 load imaginary part
* Returns:
* opcode if found
* 0 if not
*/
unsigned char loadconst(elem *e, int im)
#if __DMC__
__in
{
elem_debug(e);
assert(im == 0 || im == 1);
}
__body
#endif
{
static float fval[7] =
{0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2};
static double dval[7] =
{0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2};
static longdouble ldval[7] =
#if __DMC__ // from math.h
{0.0,1.0,M_PI_L,M_LOG2T_L,M_LOG2E_L,M_LOG2_L,M_LN2_L};
#elif _MSC_VER // struct longdouble constants
{ld_zero, ld_one, ld_pi, ld_log2t, ld_log2e, ld_log2, ld_ln2};
#else // C99 hexadecimal floats (GCC, CLANG, ...)
#define M_PI_L 0x1.921fb54442d1846ap+1L // 3.14159 fldpi
#define M_LOG2T_L 0x1.a934f0979a3715fcp+1L // 3.32193 fldl2t
#define M_LOG2E_L 0x1.71547652b82fe178p+0L // 1.4427 fldl2e
#define M_LOG2_L 0x1.34413509f79fef32p-2L // 0.30103 fldlg2
#define M_LN2_L 0x1.62e42fefa39ef358p-1L // 0.693147 fldln2
{0.0,1.0,M_PI_L,M_LOG2T_L,M_LOG2E_L,M_LOG2_L,M_LN2_L};
#endif
static char opcode[7 + 1] =
/* FLDZ,FLD1,FLDPI,FLDL2T,FLDL2E,FLDLG2,FLDLN2,0 */
{0xEE,0xE8,0xEB,0xE9,0xEA,0xEC,0xED,0};
int i;
targ_float f;
targ_double d;
targ_ldouble ld;
int sz;
int zero;
void *p;
static char zeros[sizeof(longdouble)];
if (im == 0)
{
switch (tybasic(e->Ety))
{
case TYfloat:
case TYifloat:
case TYcfloat:
f = e->EV.Vfloat;
sz = 4;
p = &f;
break;
case TYdouble:
case TYdouble_alias:
case TYidouble:
case TYcdouble:
d = e->EV.Vdouble;
sz = 8;
p = &d;
break;
case TYldouble:
case TYildouble:
case TYcldouble:
ld = e->EV.Vldouble;
sz = 10;
p = &ld;
break;
default:
assert(0);
}
}
else
{
switch (tybasic(e->Ety))
{
case TYcfloat:
f = e->EV.Vcfloat.im;
sz = 4;
p = &f;
break;
case TYcdouble:
d = e->EV.Vcdouble.im;
sz = 8;
p = &d;
break;
case TYcldouble:
ld = e->EV.Vcldouble.im;
sz = 10;
p = &ld;
break;
default:
assert(0);
}
}
// Note that for this purpose, -0 is not regarded as +0,
// since FLDZ loads a +0
zero = (memcmp(p, zeros, sz) == 0);
if (zero && config.target_cpu >= TARGET_PentiumPro)
return 0xEE; // FLDZ is the only one with 1 micro-op
// For some reason, these instructions take more clocks
if (config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
return 0;
if (zero)
return 0xEE;
for (i = 1; i < arraysize(fval); i++)
{
switch (sz)
{
case 4:
if (fval[i] != f)
continue;
break;
case 8:
if (dval[i] != d)
continue;
break;
case 10:
if (ldval[i] != ld)
continue;
break;
default:
assert(0);
}
break;
}
return opcode[i];
}
/******************************
* Given the result of an expression is in retregs,
* generate necessary code to return result in *pretregs.
*/
code *fixresult87(elem *e,regm_t retregs,regm_t *pretregs)
{
regm_t regm;
tym_t tym;
code *c1,*c2;
unsigned sz;
//printf("fixresult87(e = %p, retregs = x%x, *pretregs = x%x)\n", e,retregs,*pretregs);
//printf("fixresult87(e = %p, retregs = %s, *pretregs = %s)\n", e,regm_str(retregs),regm_str(*pretregs));
assert(!*pretregs || retregs);
c1 = CNIL;
c2 = CNIL;
tym = tybasic(e->Ety);
sz = tysize[tym];
//printf("tym = x%x, sz = %d\n", tym, sz);
if (*pretregs & mST01)
return fixresult_complex87(e, retregs, pretregs);
/* if retregs needs to be transferred into the 8087 */
if (*pretregs & mST0 && retregs & (mBP | ALLREGS))
{
assert(sz <= DOUBLESIZE);
if (!I16)
{
if (*pretregs & mPSW)
{ // Set flags
regm_t r = retregs | mPSW;
c1 = fixresult(e,retregs,&r);
}
c2 = push87();
if (sz == REGSIZE || (I64 && sz == 4))
{
unsigned reg = findreg(retregs);
c2 = genfltreg(c2,0x89,reg,0); // MOV fltreg,reg
genfltreg(c2,0xD9,0,0); // FLD float ptr fltreg
}
else
{ unsigned msreg,lsreg;
msreg = findregmsw(retregs);
lsreg = findreglsw(retregs);
c2 = genfltreg(c2,0x89,lsreg,0); // MOV fltreg,lsreg
genfltreg(c2,0x89,msreg,4); // MOV fltreg+4,msreg
genfltreg(c2,0xDD,0,0); // FLD double ptr fltreg
}
}
else
{
regm = (sz == FLOATSIZE) ? FLOATREGS : DOUBLEREGS;
regm |= *pretregs & mPSW;
c1 = fixresult(e,retregs,&regm);
regm = 0; // don't worry about result from CLIBxxx
c2 = callclib(e,
((sz == FLOATSIZE) ? CLIBfltto87 : CLIBdblto87),
&regm,0);
}
}
else if (*pretregs & (mBP | ALLREGS) && retregs & mST0)
{ unsigned mf;
unsigned reg;
assert(sz <= DOUBLESIZE);
mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
if (*pretregs & mPSW && !(retregs & mPSW))
c1 = genftst(c1,e,0);
/* FSTP floatreg */
pop87();
c1 = genfltreg(c1,ESC(mf,1),3,0);
genfwait(c1);
c2 = allocreg(pretregs,&reg,(sz == FLOATSIZE) ? TYfloat : TYdouble);
if (sz == FLOATSIZE)
{
if (!I16)
c2 = genfltreg(c2,0x8B,reg,0);
else
{ c2 = genfltreg(c2,0x8B,reg,REGSIZE);
genfltreg(c2,0x8B,findreglsw(*pretregs),0);
}
}
else
{ assert(sz == DOUBLESIZE);
if (I16)
{ c2 = genfltreg(c2,0x8B,AX,6);
genfltreg(c2,0x8B,BX,4);
genfltreg(c2,0x8B,CX,2);
genfltreg(c2,0x8B,DX,0);
}
else if (I32)
{ c2 = genfltreg(c2,0x8B,reg,REGSIZE);
genfltreg(c2,0x8B,findreglsw(*pretregs),0);
}
else // I64
{
c2 = genfltreg(c2,0x8B,reg,0);
code_orrex(c2, REX_W);
}
}
}
else if (*pretregs == 0 && retregs == mST0)
{
c1 = genf2(c1,0xDD,modregrm(3,3,0)); // FPOP
pop87();
}
else
{ if (*pretregs & mPSW)
{ if (!(retregs & mPSW))
{ assert(retregs & mST0);
c1 = genftst(c1,e,!(*pretregs & (mST0 | XMMREGS))); // FTST
}
}
if (*pretregs & mST0 && retregs & XMMREGS)
{
assert(sz <= DOUBLESIZE);
unsigned mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
// MOVD floatreg,XMM?
unsigned reg = findreg(retregs);
c1 = genfltreg(c1,xmmstore(tym),reg - XMM0,0);
c2 = push87();
c2 = genfltreg(c2,ESC(mf,1),0,0); // FLD float/double ptr fltreg
}
else if (retregs & mST0 && *pretregs & XMMREGS)
{
assert(sz <= DOUBLESIZE);
unsigned mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
// FSTP floatreg
pop87();
c1 = genfltreg(c1,ESC(mf,1),3,0);
genfwait(c1);
// MOVD XMM?,floatreg
unsigned reg;
c2 = allocreg(pretregs,&reg,(sz == FLOATSIZE) ? TYfloat : TYdouble);
c2 = genfltreg(c2,xmmload(tym),reg -XMM0,0);
}
else
assert(!(*pretregs & mST0) || (retregs & mST0));
}
if (*pretregs & mST0)
note87(e,0,0);
return cat(c1,c2);
}
/********************************
* Generate in-line 8087 code for the following operators:
* add
* min
* mul
* div
* cmp
*/
// Reverse the order that the op is done in
static const char oprev[9] = { -1,0,1,2,3,5,4,7,6 };
code *orth87(elem *e,regm_t *pretregs)
{
unsigned op;
code *c1,*c2,*c3,*c4;
code *cx;
regm_t retregs;
regm_t resregm;
elem *e1;
elem *e2;
int e2oper;
int eoper;
unsigned sz2;
int clib = CLIBMAX; // initialize to invalid value
int reverse = 0;
//printf("orth87(+e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
#if 1 // we could be evaluating / for side effects only
assert(*pretregs != 0);
#endif
retregs = mST0;
resregm = mST0;
e1 = e->E1;
e2 = e->E2;
c3 = CNIL;
c4 = CNIL;
sz2 = tysize(e1->Ety);
if (tycomplex(e1->Ety))
sz2 /= 2;
eoper = e->Eoper;
if (eoper == OPmul && e2->Eoper == OPconst && el_toldouble(e->E2) == 2.0L)
{
// Perform "mul 2.0" as fadd ST(0), ST
c1 = codelem(e1,&retregs,FALSE);
c1 = genf2(c1, 0xDC, 0xC0); // fadd ST(0), ST;
c2 = fixresult87(e,mST0,pretregs); // result is in ST(0).
freenode(e2);
return cat(c1,c2);
}
if (OTrel(eoper))
eoper = OPeqeq;
#define X(op, ty1, ty2) (((op) << 16) + (ty1) * 256 + (ty2))
switch (X(eoper, tybasic(e1->Ety), tybasic(e2->Ety)))
{
case X(OPadd, TYfloat, TYfloat):
case X(OPadd, TYdouble, TYdouble):
case X(OPadd, TYdouble_alias, TYdouble_alias):
case X(OPadd, TYldouble, TYldouble):
case X(OPadd, TYldouble, TYdouble):
case X(OPadd, TYdouble, TYldouble):
case X(OPadd, TYifloat, TYifloat):
case X(OPadd, TYidouble, TYidouble):
case X(OPadd, TYildouble, TYildouble):
op = 0; // FADDP
break;
case X(OPmin, TYfloat, TYfloat):
case X(OPmin, TYdouble, TYdouble):
case X(OPmin, TYdouble_alias, TYdouble_alias):
case X(OPmin, TYldouble, TYldouble):
case X(OPmin, TYldouble, TYdouble):
case X(OPmin, TYdouble, TYldouble):
case X(OPmin, TYifloat, TYifloat):
case X(OPmin, TYidouble, TYidouble):
case X(OPmin, TYildouble, TYildouble):
op = 4; // FSUBP
break;
case X(OPmul, TYfloat, TYfloat):
case X(OPmul, TYdouble, TYdouble):
case X(OPmul, TYdouble_alias, TYdouble_alias):
case X(OPmul, TYldouble, TYldouble):
case X(OPmul, TYldouble, TYdouble):
case X(OPmul, TYdouble, TYldouble):
case X(OPmul, TYifloat, TYifloat):
case X(OPmul, TYidouble, TYidouble):
case X(OPmul, TYildouble, TYildouble):
case X(OPmul, TYfloat, TYifloat):
case X(OPmul, TYdouble, TYidouble):
case X(OPmul, TYldouble, TYildouble):
case X(OPmul, TYifloat, TYfloat):
case X(OPmul, TYidouble, TYdouble):
case X(OPmul, TYildouble, TYldouble):
op = 1; // FMULP
break;
case X(OPdiv, TYfloat, TYfloat):
case X(OPdiv, TYdouble, TYdouble):
case X(OPdiv, TYdouble_alias, TYdouble_alias):
case X(OPdiv, TYldouble, TYldouble):
case X(OPdiv, TYldouble, TYdouble):
case X(OPdiv, TYdouble, TYldouble):
case X(OPdiv, TYifloat, TYifloat):
case X(OPdiv, TYidouble, TYidouble):
case X(OPdiv, TYildouble, TYildouble):
op = 6; // FDIVP
break;
case X(OPmod, TYfloat, TYfloat):
case X(OPmod, TYdouble, TYdouble):
case X(OPmod, TYdouble_alias, TYdouble_alias):
case X(OPmod, TYldouble, TYldouble):
case X(OPmod, TYfloat, TYifloat):
case X(OPmod, TYdouble, TYidouble):
case X(OPmod, TYldouble, TYildouble):
case X(OPmod, TYifloat, TYifloat):
case X(OPmod, TYidouble, TYidouble):
case X(OPmod, TYildouble, TYildouble):
case X(OPmod, TYifloat, TYfloat):
case X(OPmod, TYidouble, TYdouble):
case X(OPmod, TYildouble, TYldouble):
op = (unsigned) -1;
break;
case X(OPeqeq, TYfloat, TYfloat):
case X(OPeqeq, TYdouble, TYdouble):
case X(OPeqeq, TYdouble_alias, TYdouble_alias):
case X(OPeqeq, TYldouble, TYldouble):
case X(OPeqeq, TYifloat, TYifloat):
case X(OPeqeq, TYidouble, TYidouble):
case X(OPeqeq, TYildouble, TYildouble):
assert(OTrel(e->Eoper));
assert((*pretregs & mST0) == 0);
c1 = codelem(e1,&retregs,FALSE);
note87(e1,0,0);
resregm = mPSW;
if (rel_exception(e->Eoper) || config.flags4 & CFG4fastfloat)
{
if (cnst(e2) && !boolres(e2))
{
if (NOSAHF)
{
c1 = cat(c1,push87());
c1 = gen2(c1,0xD9,0xEE); // FLDZ
gen2(c1,0xDF,0xF1); // FCOMIP ST1
pop87();
}
else
{ c1 = genf2(c1,0xD9,0xE4); // FTST
c1 = cg87_87topsw(c1);
}
c2 = genf2(NULL,0xDD,modregrm(3,3,0)); // FPOP
pop87();
}
else if (NOSAHF)
{
note87(e1,0,0);
c2 = load87(e2,0,&retregs,e1,-1);
c2 = cat(c2,makesure87(e1,0,1,0));
resregm = 0;
//c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST1
c2 = gen2(c2,0xDF,0xF1); // FCOMIP ST1
pop87();
genf2(c2,0xDD,modregrm(3,3,0)); // FPOP
pop87();
}
else
{
c2 = load87(e2, 0, pretregs, e1, 3); // FCOMPP
}
}
else
{
if (cnst(e2) && !boolres(e2) &&
config.target_cpu < TARGET_80386)
{
regm_t regm = 0;
c2 = callclib(e,CLIBftest0,&regm,0);
pop87();
}
else
{
note87(e1,0,0);
c2 = load87(e2,0,&retregs,e1,-1);
c2 = cat(c2,makesure87(e1,0,1,0));
resregm = 0;
if (NOSAHF)
{
c3 = gen2(CNIL,0xDF,0xE9); // FUCOMIP ST1
pop87();
genf2(c3,0xDD,modregrm(3,3,0)); // FPOP
pop87();
}
else if (config.target_cpu >= TARGET_80386)
{
c3 = gen2(CNIL,0xDA,0xE9); // FUCOMPP
c3 = cg87_87topsw(c3);
pop87();
pop87();
}
else
// Call a function instead so that exceptions
// are not generated.
c3 = callclib(e,CLIBfcompp,&resregm,0);
}
}
freenode(e2);
return cat4(c1,c2,c3,c4);
case X(OPadd, TYcfloat, TYcfloat):
case X(OPadd, TYcdouble, TYcdouble):
case X(OPadd, TYcldouble, TYcldouble):
case X(OPadd, TYcfloat, TYfloat):
case X(OPadd, TYcdouble, TYdouble):
case X(OPadd, TYcldouble, TYldouble):
case X(OPadd, TYfloat, TYcfloat):
case X(OPadd, TYdouble, TYcdouble):
case X(OPadd, TYldouble, TYcldouble):
goto Lcomplex;
case X(OPadd, TYifloat, TYcfloat):
case X(OPadd, TYidouble, TYcdouble):
case X(OPadd, TYildouble, TYcldouble):
goto Lcomplex2;
case X(OPmin, TYcfloat, TYcfloat):
case X(OPmin, TYcdouble, TYcdouble):
case X(OPmin, TYcldouble, TYcldouble):
case X(OPmin, TYcfloat, TYfloat):
case X(OPmin, TYcdouble, TYdouble):
case X(OPmin, TYcldouble, TYldouble):
case X(OPmin, TYfloat, TYcfloat):
case X(OPmin, TYdouble, TYcdouble):
case X(OPmin, TYldouble, TYcldouble):
goto Lcomplex;
case X(OPmin, TYifloat, TYcfloat):
case X(OPmin, TYidouble, TYcdouble):
case X(OPmin, TYildouble, TYcldouble):
goto Lcomplex2;
case X(OPmul, TYcfloat, TYcfloat):
case X(OPmul, TYcdouble, TYcdouble):
case X(OPmul, TYcldouble, TYcldouble):
clib = CLIBcmul;
goto Lcomplex;
case X(OPdiv, TYcfloat, TYcfloat):
case X(OPdiv, TYcdouble, TYcdouble):
case X(OPdiv, TYcldouble, TYcldouble):
case X(OPdiv, TYfloat, TYcfloat):
case X(OPdiv, TYdouble, TYcdouble):
case X(OPdiv, TYldouble, TYcldouble):
case X(OPdiv, TYifloat, TYcfloat):
case X(OPdiv, TYidouble, TYcdouble):
case X(OPdiv, TYildouble, TYcldouble):
clib = CLIBcdiv;
goto Lcomplex;
case X(OPdiv, TYifloat, TYfloat):
case X(OPdiv, TYidouble, TYdouble):
case X(OPdiv, TYildouble, TYldouble):
op = 6; // FDIVP
break;
Lcomplex:
c1 = loadComplex(e1);
c2 = loadComplex(e2);
c3 = makesure87(e1, sz2, 2, 0);
c3 = cat(c3,makesure87(e1, 0, 3, 0));
retregs = mST01;
if (eoper == OPadd)
{
c4 = genf2(NULL, 0xDE, 0xC0+2); // FADDP ST(2),ST
genf2(c4, 0xDE, 0xC0+2); // FADDP ST(2),ST
pop87();
pop87();
}
else if (eoper == OPmin)
{
c4 = genf2(NULL, 0xDE, 0xE8+2); // FSUBP ST(2),ST
genf2(c4, 0xDE, 0xE8+2); // FSUBP ST(2),ST
pop87();
pop87();
}
else
c4 = callclib(e, clib, &retregs, 0);
c4 = cat(c4, fixresult_complex87(e, retregs, pretregs));
return cat4(c1,c2,c3,c4);
Lcomplex2:
retregs = mST0;
c1 = codelem(e1, &retregs, FALSE);
note87(e1, 0, 0);
c2 = loadComplex(e2);
c3 = makesure87(e1, 0, 2, 0);
retregs = mST01;
if (eoper == OPadd)
{
c4 = genf2(NULL, 0xDE, 0xC0+2); // FADDP ST(2),ST
}
else if (eoper == OPmin)
{
c4 = genf2(NULL, 0xDE, 0xE8+2); // FSUBP ST(2),ST
c4 = genf2(c4, 0xD9, 0xE0); // FCHS
}
else
assert(0);
pop87();
c4 = genf2(c4, 0xD9, 0xC8 + 1); // FXCH ST(1)
c4 = cat(c4, fixresult_complex87(e, retregs, pretregs));
return cat4(c1,c2,c3,c4);
case X(OPeqeq, TYcfloat, TYcfloat):
case X(OPeqeq, TYcdouble, TYcdouble):
case X(OPeqeq, TYcldouble, TYcldouble):
case X(OPeqeq, TYcfloat, TYifloat):
case X(OPeqeq, TYcdouble, TYidouble):
case X(OPeqeq, TYcldouble, TYildouble):
case X(OPeqeq, TYcfloat, TYfloat):
case X(OPeqeq, TYcdouble, TYdouble):
case X(OPeqeq, TYcldouble, TYldouble):
case X(OPeqeq, TYifloat, TYcfloat):
case X(OPeqeq, TYidouble, TYcdouble):
case X(OPeqeq, TYildouble, TYcldouble):
case X(OPeqeq, TYfloat, TYcfloat):
case X(OPeqeq, TYdouble, TYcdouble):
case X(OPeqeq, TYldouble, TYcldouble):
case X(OPeqeq, TYfloat, TYifloat):
case X(OPeqeq, TYdouble, TYidouble):
case X(OPeqeq, TYldouble, TYildouble):
case X(OPeqeq, TYifloat, TYfloat):
case X(OPeqeq, TYidouble, TYdouble):
case X(OPeqeq, TYildouble, TYldouble):
c1 = loadComplex(e1);
c2 = loadComplex(e2);
c3 = makesure87(e1, sz2, 2, 0);
c3 = cat(c3,makesure87(e1, 0, 3, 0));
retregs = 0;
c4 = callclib(e, CLIBccmp, &retregs, 0);
return cat4(c1,c2,c3,c4);
case X(OPadd, TYfloat, TYifloat):
case X(OPadd, TYdouble, TYidouble):
case X(OPadd, TYldouble, TYildouble):
case X(OPadd, TYifloat, TYfloat):
case X(OPadd, TYidouble, TYdouble):
case X(OPadd, TYildouble, TYldouble):
case X(OPmin, TYfloat, TYifloat):
case X(OPmin, TYdouble, TYidouble):
case X(OPmin, TYldouble, TYildouble):
case X(OPmin, TYifloat, TYfloat):
case X(OPmin, TYidouble, TYdouble):
case X(OPmin, TYildouble, TYldouble):
retregs = mST0;
c1 = codelem(e1, &retregs, FALSE);
note87(e1, 0, 0);
c2 = codelem(e2, &retregs, FALSE);
c3 = makesure87(e1, 0, 1, 0);
if (eoper == OPmin)
c3 = genf2(c3, 0xD9, 0xE0); // FCHS
if (tyimaginary(e1->Ety))
c3 = genf2(c3, 0xD9, 0xC8 + 1); // FXCH ST(1)
retregs = mST01;
c4 = fixresult_complex87(e, retregs, pretregs);
return cat4(c1,c2,c3,c4);
case X(OPadd, TYcfloat, TYifloat):
case X(OPadd, TYcdouble, TYidouble):
case X(OPadd, TYcldouble, TYildouble):
op = 0;
goto Lci;
case X(OPmin, TYcfloat, TYifloat):
case X(OPmin, TYcdouble, TYidouble):
case X(OPmin, TYcldouble, TYildouble):
op = 4;
goto Lci;
Lci:
c1 = loadComplex(e1);
retregs = mST0;
c2 = load87(e2,sz2,&retregs,e1,op);
freenode(e2);
retregs = mST01;
c3 = makesure87(e1,0,1,0);
c4 = fixresult_complex87(e, retregs, pretregs);
return cat4(c1,c2,c3,c4);
case X(OPmul, TYcfloat, TYfloat):
case X(OPmul, TYcdouble, TYdouble):
case X(OPmul, TYcldouble, TYldouble):
c1 = loadComplex(e1);
goto Lcm1;
case X(OPmul, TYcfloat, TYifloat):
case X(OPmul, TYcdouble, TYidouble):
case X(OPmul, TYcldouble, TYildouble):
c1 = loadComplex(e1);
c1 = genf2(c1, 0xD9, 0xE0); // FCHS
genf2(c1,0xD9,0xC8 + 1); // FXCH ST(1)
if (elemisone(e2))
{
freenode(e2);
c2 = NULL;
c3 = NULL;
goto Lcd4;
}
goto Lcm1;
Lcm1:
retregs = mST0;
c2 = codelem(e2, &retregs, FALSE);
c3 = makesure87(e1, sz2, 1, 0);
c3 = cat(c3,makesure87(e1, 0, 2, 0));
goto Lcm2;
case X(OPmul, TYfloat, TYcfloat):
case X(OPmul, TYdouble, TYcdouble):
case X(OPmul, TYldouble, TYcldouble):
retregs = mST0;
c1 = codelem(e1, &retregs, FALSE);
note87(e1, 0, 0);
c2 = loadComplex(e2);
c3 = makesure87(e1, 0, 2, 0);
c3 = genf2(c3,0xD9,0xC8 + 1); // FXCH ST(1)
genf2(c3,0xD9,0xC8 + 2); // FXCH ST(2)
goto Lcm2;
case X(OPmul, TYifloat, TYcfloat):
case X(OPmul, TYidouble, TYcdouble):
case X(OPmul, TYildouble, TYcldouble):
retregs = mST0;
c1 = codelem(e1, &retregs, FALSE);
note87(e1, 0, 0);
c2 = loadComplex(e2);
c3 = makesure87(e1, 0, 2, 0);
c3 = genf2(c3, 0xD9, 0xE0); // FCHS
genf2(c3,0xD9,0xC8 + 2); // FXCH ST(2)
goto Lcm2;
Lcm2:
c3 = genf2(c3,0xDC,0xC8 + 2); // FMUL ST(2), ST
genf2(c3,0xDE,0xC8 + 1); // FMULP ST(1), ST
goto Lcd3;
case X(OPdiv, TYcfloat, TYfloat):
case X(OPdiv, TYcdouble, TYdouble):
case X(OPdiv, TYcldouble, TYldouble):
c1 = loadComplex(e1);
retregs = mST0;
c2 = codelem(e2, &retregs, FALSE);
c3 = makesure87(e1, sz2, 1, 0);
c3 = cat(c3,makesure87(e1, 0, 2, 0));
goto Lcd1;
case X(OPdiv, TYcfloat, TYifloat):
case X(OPdiv, TYcdouble, TYidouble):
case X(OPdiv, TYcldouble, TYildouble):
c1 = loadComplex(e1);
c1 = genf2(c1,0xD9,0xC8 + 1); // FXCH ST(1)
xchg87(0, 1);
genf2(c1, 0xD9, 0xE0); // FCHS
retregs = mST0;
c2 = codelem(e2, &retregs, FALSE);
c3 = makesure87(e1, 0, 1, 0);
c3 = cat(c3,makesure87(e1, sz2, 2, 0));
Lcd1:
c3 = genf2(c3,0xDC,0xF8 + 2); // FDIV ST(2), ST
genf2(c3,0xDE,0xF8 + 1); // FDIVP ST(1), ST
Lcd3:
pop87();
Lcd4:
retregs = mST01;
c4 = fixresult_complex87(e, retregs, pretregs);
return cat4(c1, c2, c3, c4);
case X(OPmod, TYcfloat, TYfloat):
case X(OPmod, TYcdouble, TYdouble):
case X(OPmod, TYcldouble, TYldouble):
case X(OPmod, TYcfloat, TYifloat):
case X(OPmod, TYcdouble, TYidouble):
case X(OPmod, TYcldouble, TYildouble):
/*
fld E1.re
fld E1.im
fld E2
fxch ST(1)
FM1: fprem
fstsw word ptr sw
fwait
mov AH, byte ptr sw+1
jp FM1
fxch ST(2)
FM2: fprem
fstsw word ptr sw
fwait
mov AH, byte ptr sw+1
jp FM2
fstp ST(1)
fxch ST(1)
*/
c1 = loadComplex(e1);
retregs = mST0;
c2 = codelem(e2, &retregs, FALSE);
c3 = makesure87(e1, sz2, 1, 0);
c3 = cat(c3,makesure87(e1, 0, 2, 0));
c3 = genf2(c3, 0xD9, 0xC8 + 1); // FXCH ST(1)
cx = gen2(NULL, 0xD9, 0xF8); // FPREM
cx = cg87_87topsw(cx);
cx = genjmp(cx, JP, FLcode, (block *)cx); // JP FM1
cx = genf2(cx, 0xD9, 0xC8 + 2); // FXCH ST(2)
c3 = cat(c3,cx);
cx = gen2(NULL, 0xD9, 0xF8); // FPREM
cx = cg87_87topsw(cx);
cx = genjmp(cx, JP, FLcode, (block *)cx); // JP FM2
cx = genf2(cx,0xDD,0xD8 + 1); // FSTP ST(1)
cx = genf2(cx, 0xD9, 0xC8 + 1); // FXCH ST(1)
c3 = cat(c3,cx);
goto Lcd3;
default:
#ifdef DEBUG
elem_print(e);
#endif
assert(0);
break;
}
#undef X
e2oper = e2->Eoper;
/* Move double-sized operand into the second position if there's a chance
* it will allow combining a load with an operation (DMD Bugzilla 2905)
*/
if ( ((tybasic(e1->Ety) == TYdouble)
&& ((e1->Eoper == OPvar) || (e1->Eoper == OPconst))
&& (tybasic(e2->Ety) != TYdouble)) ||
(e1->Eoper == OPconst) ||
(e1->Eoper == OPvar &&
((e1->Ety & (mTYconst | mTYimmutable) && !OTleaf(e2oper)) ||
(e2oper == OPd_f &&
(e2->E1->Eoper == OPs32_d || e2->E1->Eoper == OPs64_d || e2->E1->Eoper == OPs16_d) &&
e2->E1->E1->Eoper == OPvar
) ||
((e2oper == OPs32_d || e2oper == OPs64_d || e2oper == OPs16_d) &&
e2->E1->Eoper == OPvar
)
)
)
)
{ // Reverse order of evaluation
e1 = e->E2;
e2 = e->E1;
op = oprev[op + 1];
reverse ^= 1;
}
c1 = codelem(e1,&retregs,FALSE);
note87(e1,0,0);
if (config.flags4 & CFG4fdivcall && e->Eoper == OPdiv)
{
regm_t retregs = mST0;
c2 = load87(e2,0,&retregs,e1,-1);
c2 = cat(c2,makesure87(e1,0,1,0));
if (op == 7) // if reverse divide
c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1)
c2 = cat(c2,callclib(e,CLIBfdiv87,&retregs,0));
pop87();
resregm = mST0;
freenode(e2);
c4 = fixresult87(e,resregm,pretregs);
}
else if (e->Eoper == OPmod)
{
/*
* fld tbyte ptr y
* fld tbyte ptr x // ST = x, ST1 = y
* FM1: // We don't use fprem1 because for some inexplicable
* // reason we get -5 when we do _modulo(15, 10)
* fprem // ST = ST % ST1
* fstsw word ptr sw
* fwait
* mov AH,byte ptr sw+1 // get msb of status word in AH
* sahf // transfer to flags
* jp FM1 // continue till ST < ST1
* fstp ST(1) // leave remainder on stack
*/
regm_t retregs = mST0;
c2 = load87(e2,0,&retregs,e1,-1);
c2 = cat(c2,makesure87(e1,0,1,0)); // now have x,y on stack; need y,x
if (!reverse) // if not reverse modulo
c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1)
c3 = gen2(NULL, 0xD9, 0xF8); // FM1: FPREM
c3 = cg87_87topsw(c3);
c3 = genjmp(c3, JP, FLcode, (block *)c3); // JP FM1
c3 = genf2(c3,0xDD,0xD8 + 1); // FSTP ST(1)
pop87();
resregm = mST0;
freenode(e2);
c4 = fixresult87(e,resregm,pretregs);
}
else
{ c2 = load87(e2,0,pretregs,e1,op);
freenode(e2);
}
if (*pretregs & mST0)
note87(e,0,0);
//printf("orth87(-e = %p, *pretregs = x%x)\n", e, *pretregs);
return cat4(c1,c2,c3,c4);
}
/*****************************
* Load e into ST01.
*/
code *loadComplex(elem *e)
{ int sz;
regm_t retregs;
code *c;
sz = tysize(e->Ety);
switch (tybasic(e->Ety))
{
case TYfloat:
case TYdouble:
case TYldouble:
retregs = mST0;
c = codelem(e,&retregs,FALSE);
// Convert to complex with a 0 for the imaginary part
c = cat(c, push87());
c = gen2(c,0xD9,0xEE); // FLDZ
break;
case TYifloat:
case TYidouble:
case TYildouble:
// Convert to complex with a 0 for the real part
c = push87();
c = gen2(c,0xD9,0xEE); // FLDZ
retregs = mST0;
c = cat(c, codelem(e,&retregs,FALSE));
break;
case TYcfloat:
case TYcdouble:
case TYcldouble:
sz /= 2;
retregs = mST01;
c = codelem(e,&retregs,FALSE);
break;
default:
assert(0);
}
note87(e, 0, 1);
note87(e, sz, 0);
return c;
}
/*************************
* If op == -1, load expression e into ST0.
* else compute (eleft op e), eleft is in ST0.
* Must follow same logic as cmporder87();
*/
code *load87(elem *e,unsigned eoffset,regm_t *pretregs,elem *eleft,int op)
{
code *ccomma,*c,*c2,*cpush;
code cs;
regm_t retregs;
unsigned reg,mf,mf1;
int opr;
unsigned char ldop;
tym_t ty;
int i;
#if NDPP
printf("+load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,stackused);
#endif
elem_debug(e);
ccomma = NULL;
cpush = NULL;
if (ADDFWAIT())
cs.Iflags = CFwait;
else
cs.Iflags = 0;
cs.Irex = 0;
opr = oprev[op + 1];
ty = tybasic(e->Ety);
if ((ty == TYldouble || ty == TYildouble) &&
op != -1 && e->Eoper != OPd_ld)
goto Ldefault;
mf = (ty == TYfloat || ty == TYifloat || ty == TYcfloat) ? MFfloat : MFdouble;
L5:
switch (e->Eoper)
{
case OPcomma:
ccomma = docommas(&e);
// if (op != -1)
// ccomma = cat(ccomma,makesure87(eleft,eoffset,0,0));
goto L5;
case OPvar:
notreg(e);
case OPind:
L2:
if (op != -1)
{
if (e->Ecount && e->Ecount != e->Ecomsub &&
(i = cse_get(e, 0)) >= 0)
{ static unsigned char b2[8] = {0xC0,0xC8,0xD0,0xD8,0xE0,0xE8,0xF0,0xF8};
c = genf2(NULL,0xD8,b2[op] + i); // Fop ST(i)
}
else
{
c = getlvalue(&cs,e,0);
if (I64)
cs.Irex &= ~REX_W; // don't use for x87 ops
c = cat(c,makesure87(eleft,eoffset,0,0));
cs.Iop = ESC(mf,0);
cs.Irm |= modregrm(0,op,0);
c = gen(c,&cs);
}
}
else
{
cpush = push87();
switch (ty)
{
case TYfloat:
case TYdouble:
case TYifloat:
case TYidouble:
case TYcfloat:
case TYcdouble:
case TYdouble_alias:
c = loadea(e,&cs,ESC(mf,1),0,0,0,0); // FLD var
break;
case TYldouble:
case TYildouble:
case TYcldouble:
c = loadea(e,&cs,0xDB,5,0,0,0); // FLD var
break;
default:
// __debug printf("ty = x%x\n", ty);
assert(0);
break;
}
note87(e,0,0);
}
break;
case OPd_f:
case OPf_d:
case OPd_ld:
mf1 = (tybasic(e->E1->Ety) == TYfloat || tybasic(e->E1->Ety) == TYifloat)
? MFfloat : MFdouble;
if (op != -1 && stackused)
note87(eleft,eoffset,0); // don't trash this value
if (e->E1->Eoper == OPvar || e->E1->Eoper == OPind)
{
#if 1
L4:
c = getlvalue(&cs,e->E1,0);
cs.Iop = ESC(mf1,0);
if (ADDFWAIT())
cs.Iflags |= CFwait;
if (!I16)
cs.Iflags &= ~CFopsize;
if (op != -1)
{ cs.Irm |= modregrm(0,op,0);
c = cat(c,makesure87(eleft,eoffset,0,0));
}
else
{ cs.Iop |= 1;
c = cat(c,push87());
}
c = gen(c,&cs); /* FLD / Fop */
#else
c = loadea(e->E1,&cs,ESC(mf1,1),0,0,0,0); /* FLD e->E1 */
#endif
/* Variable cannot be put into a register anymore */
if (e->E1->Eoper == OPvar)
notreg(e->E1);
freenode(e->E1);
}
else
{
retregs = mST0;
c = codelem(e->E1,&retregs,FALSE);
if (op != -1)
{ c = cat(c,makesure87(eleft,eoffset,1,0));
c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP
pop87();
}
}
break;
case OPs64_d:
if (e->E1->Eoper == OPvar ||
(e->E1->Eoper == OPind && e->E1->Ecount == 0))
{
c = getlvalue(&cs,e->E1,0);
cs.Iop = 0xDF;
if (ADDFWAIT())
cs.Iflags |= CFwait;
if (!I16)
cs.Iflags &= ~CFopsize;
c = cat(c,push87());
cs.Irm |= modregrm(0,5,0);
c = gen(c,&cs); // FILD m64
// Variable cannot be put into a register anymore
if (e->E1->Eoper == OPvar)
notreg(e->E1);
freenode(e->E1);
}
else if (I64)
{
retregs = ALLREGS;
c = codelem(e->E1,&retregs,FALSE);
reg = findreg(retregs);
c = genfltreg(c,0x89,reg,0); // MOV floatreg,reg
code_orrex(c, REX_W);
c = cat(c,push87());
c = genfltreg(c,0xDF,5,0); // FILD long long ptr floatreg
}
else
{
retregs = ALLREGS;
c = codelem(e->E1,&retregs,FALSE);
reg = findreglsw(retregs);
c = genfltreg(c,0x89,reg,0); // MOV floatreg,reglsw
reg = findregmsw(retregs);
c = genfltreg(c,0x89,reg,4); // MOV floatreg+4,regmsw
c = cat(c,push87());
c = genfltreg(c,0xDF,5,0); // FILD long long ptr floatreg
}
if (op != -1)
{ c = cat(c,makesure87(eleft,eoffset,1,0));
c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP
pop87();
}
break;
case OPconst:
ldop = loadconst(e, 0);
if (ldop)
{
cpush = push87();
c = genf2(NULL,0xD9,ldop); // FLDx
if (op != -1)
{ genf2(c,0xDE,modregrm(3,opr,1)); // FopRP
pop87();
}
}
else
{
assert(0);
}
break;
case OPu16_d:
{
/* This opcode should never be generated */
/* (probably shouldn't be for 16 bit code too) */
assert(!I32);
if (op != -1)
note87(eleft,eoffset,0); // don't trash this value
retregs = ALLREGS & mLSW;
c = codelem(e->E1,&retregs,FALSE);
c = regwithvalue(c,ALLREGS & mMSW,0,&reg,0); // 0-extend
retregs |= mask[reg];
mf1 = MFlong;
goto L3;
}
case OPs16_d: mf1 = MFword; goto L6;
case OPs32_d: mf1 = MFlong; goto L6;
L6:
if (op != -1)
note87(eleft,eoffset,0); // don't trash this value
if (e->E1->Eoper == OPvar ||
(e->E1->Eoper == OPind && e->E1->Ecount == 0))
{
goto L4;
}
else
{
retregs = ALLREGS;
c = codelem(e->E1,&retregs,FALSE);
L3:
if (I16 && e->Eoper != OPs16_d)
{
/* MOV floatreg+2,reg */
reg = findregmsw(retregs);
c = genfltreg(c,0x89,reg,REGSIZE);
retregs &= mLSW;
}
reg = findreg(retregs);
c = genfltreg(c,0x89,reg,0); /* MOV floatreg,reg */
if (op != -1)
{ c = cat(c,makesure87(eleft,eoffset,0,0));
genfltreg(c,ESC(mf1,0),op,0); /* Fop floatreg */
}
else
{
/* FLD long ptr floatreg */
c = cat(c,push87());
c = genfltreg(c,ESC(mf1,1),0,0);
}
}
break;
default:
Ldefault:
retregs = mST0;
#if 1 /* Do this instead of codelem() to avoid the freenode(e).
We also lose CSE capability */
if (e->Eoper == OPconst)
{
c = load87(e, 0, &retregs, NULL, -1);
}
else
c = (*cdxxx[e->Eoper])(e,&retregs);
#else
c = codelem(e,&retregs,FALSE);
#endif
if (op != -1)
{
c = cat(c,makesure87(eleft,eoffset,1,(op == 0 || op == 1)));
pop87();
if (op == 4 || op == 6) // sub or div
{ code *cl;
cl = code_last(c);
if (cl && cl->Iop == 0xD9 && cl->Irm == 0xC9) // FXCH ST(1)
{ cl->Iop = NOP;
opr = op; // reverse operands
}
}
c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP
}
break;
}
if (op == 3) // FCOMP
{ pop87(); // extra pop was done
cg87_87topsw(c);
}
c2 = fixresult87(e,((op == 3) ? mPSW : mST0),pretregs);
#if NDPP
printf("-load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,stackused);
#endif
return cat4(ccomma,cpush,c,c2);
}
/********************************
* Determine if a compare is to be done forwards (return 0)
* or backwards (return 1).
* Must follow same logic as load87().
*/
int cmporder87(elem *e)
{
//printf("cmporder87(%p)\n",e);
L1:
switch (e->Eoper)
{
case OPcomma:
e = e->E2;
goto L1;
case OPd_f:
case OPf_d:
case OPd_ld:
if (e->E1->Eoper == OPvar || e->E1->Eoper == OPind)
goto ret0;
else
goto ret1;
case OPconst:
if (loadconst(e, 0) || tybasic(e->Ety) == TYldouble
|| tybasic(e->Ety) == TYildouble)
{
//printf("ret 1, loadconst(e) = %d\n", loadconst(e));
goto ret1;
}
goto ret0;
case OPvar:
case OPind:
if (tybasic(e->Ety) == TYldouble ||
tybasic(e->Ety) == TYildouble)
goto ret1;
case OPu16_d:
case OPs16_d:
case OPs32_d:
goto ret0;
case OPs64_d:
goto ret1;
default:
goto ret1;
}
ret1: return 1;
ret0: return 0;
}
/*******************************
* Perform an assignment to a long double/double/float.
*/
code *eq87(elem *e,regm_t *pretregs)
{
regm_t retregs;
code *c1,*c2;
code cs;
unsigned op1;
unsigned op2;
tym_t ty1;
//printf("+eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
assert(e->Eoper == OPeq);
retregs = mST0 | (*pretregs & mPSW);
c1 = codelem(e->E2,&retregs,FALSE);
ty1 = tybasic(e->E1->Ety);
switch (ty1)
{ case TYdouble_alias:
case TYidouble:
case TYdouble: op1 = ESC(MFdouble,1); op2 = 3; break;
case TYifloat:
case TYfloat: op1 = ESC(MFfloat,1); op2 = 3; break;
case TYildouble:
case TYldouble: op1 = 0xDB; op2 = 7; break;
default:
assert(0);
}
if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too
{
if (ty1 == TYldouble || ty1 == TYildouble)
{
c1 = cat(c1,push87());
c1 = genf2(c1,0xD9,0xC0); // FLD ST(0)
pop87();
}
else
op2 = 2; // FST e->E1
}
else
{ // FSTP e->E1
pop87();
}
#if 0
// Doesn't work if ST(0) gets saved to the stack by getlvalue()
c2 = loadea(e->E1,&cs,op1,op2,0,0,0);
#else
cs.Irex = 0;
cs.Iflags = 0;
cs.Iop = op1;
if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too
{ // Make sure it's still there
elem *e2 = e->E2;
while (e2->Eoper == OPcomma)
e2 = e2->E2;
note87(e2,0,0);
c2 = getlvalue(&cs, e->E1, 0);
c2 = cat(c2,makesure87(e2,0,0,1));
}
else
{
c2 = getlvalue(&cs, e->E1, 0);
}
cs.Irm |= modregrm(0,op2,0); // OR in reg field
if (I32)
cs.Iflags &= ~CFopsize;
else if (ADDFWAIT())
cs.Iflags |= CFwait;
else if (I64)
cs.Irex &= ~REX_W;
c2 = gen(c2, &cs);
#if LNGDBLSIZE == 12
if (tysize[TYldouble] == 12)
{
/* This deals with the fact that 10 byte reals really
* occupy 12 bytes by zeroing the extra 2 bytes.
*/
if (op1 == 0xDB)
{
cs.Iop = 0xC7; // MOV EA+10,0
NEWREG(cs.Irm, 0);
cs.IEV1.sp.Voffset += 10;
cs.IFL2 = FLconst;
cs.IEV2.Vint = 0;
cs.Iflags |= CFopsize;
c2 = gen(c2, &cs);
}
}
#endif
if (tysize[TYldouble] == 16)
{
/* This deals with the fact that 10 byte reals really
* occupy 16 bytes by zeroing the extra 6 bytes.
*/
if (op1 == 0xDB)
{
cs.Irex &= ~REX_W;
cs.Iop = 0xC7; // MOV EA+10,0
NEWREG(cs.Irm, 0);
cs.IEV1.sp.Voffset += 10;
cs.IFL2 = FLconst;
cs.IEV2.Vint = 0;
cs.Iflags |= CFopsize;
c2 = gen(c2, &cs);
cs.IEV1.sp.Voffset += 2;
cs.Iflags &= ~CFopsize;
c2 = gen(c2, &cs);
}
}
#endif
c2 = genfwait(c2);
freenode(e->E1);
c1 = cat3(c1,c2,fixresult87(e,mST0 | mPSW,pretregs));
return c1;
}
/*******************************
* Perform an assignment to a long double/double/float.
*/
code *complex_eq87(elem *e,regm_t *pretregs)
{
regm_t retregs;
code *c1,*c2;
code cs;
unsigned op1;
unsigned op2;
unsigned sz;
tym_t ty1;
int fxch = 0;
//printf("complex_eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
assert(e->Eoper == OPeq);
cs.Iflags = ADDFWAIT() ? CFwait : 0;
cs.Irex = 0;
retregs = mST01 | (*pretregs & mPSW);
c1 = codelem(e->E2,&retregs,FALSE);
ty1 = tybasic(e->E1->Ety);
switch (ty1)
{
case TYcdouble: op1 = ESC(MFdouble,1); op2 = 3; break;
case TYcfloat: op1 = ESC(MFfloat,1); op2 = 3; break;
case TYcldouble: op1 = 0xDB; op2 = 7; break;
default:
assert(0);
}
if (*pretregs & (mST01 | mXMM0 | mXMM1)) // if want result on stack too
{
if (ty1 == TYcldouble)
{
c1 = cat(c1,push87());
c1 = cat(c1,push87());
c1 = genf2(c1,0xD9,0xC0 + 1); // FLD ST(1)
genf2(c1,0xD9,0xC0 + 1); // FLD ST(1)
pop87();
pop87();
}
else
{ op2 = 2; // FST e->E1
fxch = 1;
}
}
else
{ // FSTP e->E1
pop87();
pop87();
}
sz = tysize(ty1) / 2;
if (*pretregs & (mST01 | mXMM0 | mXMM1))
{
cs.Iflags = 0;
cs.Irex = 0;
cs.Iop = op1;
c2 = getlvalue(&cs, e->E1, 0);
cs.IEVoffset1 += sz;
cs.Irm |= modregrm(0, op2, 0);
c2 = cat(c2, makesure87(e->E2, sz, 0, 0));
c2 = gen(c2, &cs);
c2 = genfwait(c2);
c2 = cat(c2, makesure87(e->E2, 0, 1, 0));
}
else
{
c2 = loadea(e->E1,&cs,op1,op2,sz,0,0);
c2 = genfwait(c2);
}
if (fxch)
c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1)
cs.IEVoffset1 -= sz;
gen(c2, &cs);
if (fxch)
genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1)
if (tysize[TYldouble] == 12)
{
if (op1 == 0xDB)
{
cs.Iop = 0xC7; // MOV EA+10,0
NEWREG(cs.Irm, 0);
cs.IEV1.sp.Voffset += 10;
cs.IFL2 = FLconst;
cs.IEV2.Vint = 0;
cs.Iflags |= CFopsize;
c2 = gen(c2, &cs);
cs.IEVoffset1 += 12;
c2 = gen(c2, &cs); // MOV EA+22,0
}
}
if (tysize[TYldouble] == 16)
{
if (op1 == 0xDB)
{
cs.Iop = 0xC7; // MOV EA+10,0
NEWREG(cs.Irm, 0);
cs.IEV1.sp.Voffset += 10;
cs.IFL2 = FLconst;
cs.IEV2.Vint = 0;
cs.Iflags |= CFopsize;
c2 = gen(c2, &cs);
cs.IEV1.sp.Voffset += 2;
cs.Iflags &= ~CFopsize;
c2 = gen(c2, &cs);
cs.IEV1.sp.Voffset += 14;
cs.Iflags |= CFopsize;
c2 = gen(c2, &cs);
cs.IEV1.sp.Voffset += 2;
cs.Iflags &= ~CFopsize;
c2 = gen(c2, &cs);
}
}
c2 = genfwait(c2);
freenode(e->E1);
return cat3(c1,c2,fixresult_complex87(e,mST01 | mPSW,pretregs));
}
/*******************************
* Perform an assignment while converting to integral type,
* i.e. handle (e1 = (int) e2)
*/
code *cnvteq87(elem *e,regm_t *pretregs)
{
regm_t retregs;
code *c1,*c2;
code cs;
unsigned op1;
unsigned op2;
assert(e->Eoper == OPeq);
assert(!*pretregs);
retregs = mST0;
elem_debug(e->E2);
c1 = codelem(e->E2->E1,&retregs,FALSE);
switch (e->E2->Eoper)
{ case OPd_s16:
op1 = ESC(MFword,1);
op2 = 3;
break;
case OPd_s32:
case OPd_u16:
op1 = ESC(MFlong,1);
op2 = 3;
break;
case OPd_s64:
op1 = 0xDF;
op2 = 7;
break;
default:
assert(0);
}
freenode(e->E2);
c1 = genfwait(c1);
c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0
pop87();
cs.Iflags = ADDFWAIT() ? CFwait : 0;
if (e->E1->Eoper == OPvar)
notreg(e->E1); // cannot be put in register anymore
c2 = loadea(e->E1,&cs,op1,op2,0,0,0);
c2 = genfwait(c2);
c2 = genrnd(c2, CW_roundtonearest); // FLDCW roundtonearest
freenode(e->E1);
return cat(c1,c2);
}
/**********************************
* Perform +=, -=, *= and /= for doubles.
*/
code *opass87(elem *e,regm_t *pretregs)
{
regm_t retregs;
code *cl,*cr,*c;
code cs;
unsigned op;
unsigned opld;
unsigned op1;
unsigned op2;
tym_t ty1;
ty1 = tybasic(e->E1->Ety);
switch (ty1)
{ case TYdouble_alias:
case TYidouble:
case TYdouble: op1 = ESC(MFdouble,1); op2 = 3; break;
case TYifloat:
case TYfloat: op1 = ESC(MFfloat,1); op2 = 3; break;
case TYildouble:
case TYldouble: op1 = 0xDB; op2 = 7; break;
case TYcfloat:
case TYcdouble:
case TYcldouble:
return (e->Eoper == OPmodass)
? opmod_complex87(e, pretregs)
: opass_complex87(e, pretregs);
default:
assert(0);
}
switch (e->Eoper)
{ case OPpostinc:
case OPaddass: op = 0 << 3; opld = 0xC1; break; // FADD
case OPpostdec:
case OPminass: op = 5 << 3; opld = 0xE1; /*0xE9;*/ break; // FSUBR
case OPmulass: op = 1 << 3; opld = 0xC9; break; // FMUL
case OPdivass: op = 7 << 3; opld = 0xF1; break; // FDIVR
case OPmodass: break;
default: assert(0);
}
retregs = mST0;
cr = codelem(e->E2,&retregs,FALSE); // evaluate rvalue
note87(e->E2,0,0);
cl = getlvalue(&cs,e->E1,e->Eoper==OPmodass?mAX:0);
cl = cat(cl,makesure87(e->E2,0,0,0));
cs.Iflags |= ADDFWAIT() ? CFwait : 0;
if (I32)
cs.Iflags &= ~CFopsize;
if (config.flags4 & CFG4fdivcall && e->Eoper == OPdivass)
{
c = push87();
cs.Iop = op1;
if (ty1 == TYldouble || ty1 == TYildouble)
cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ...
c = gen(c,&cs);
c = genf2(c,0xD9,0xC8 + 1); // FXCH ST(1)
c = cat(c,callclib(e,CLIBfdiv87,&retregs,0));
pop87();
}
else if (e->Eoper == OPmodass)
{
/*
* fld tbyte ptr y
* fld tbyte ptr x // ST = x, ST1 = y
* FM1: // We don't use fprem1 because for some inexplicable
* // reason we get -5 when we do _modulo(15, 10)
* fprem // ST = ST % ST1
* fstsw word ptr sw
* fwait
* mov AH,byte ptr sw+1 // get msb of status word in AH
* sahf // transfer to flags
* jp FM1 // continue till ST < ST1
* fstp ST(1) // leave remainder on stack
*/
code *c1;
c = push87();
cs.Iop = op1;
if (ty1 == TYldouble || ty1 == TYildouble)
cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ...
c = gen(c,&cs); // FLD e->E1
c1 = gen2(NULL, 0xD9, 0xF8); // FPREM
c1 = cg87_87topsw(c1);
c1 = genjmp(c1, JP, FLcode, (block *)c1); // JP FM1
c1 = genf2(c1,0xDD,0xD8 + 1); // FSTP ST(1)
c = cat(c,c1);
pop87();
}
else if (ty1 == TYldouble || ty1 == TYildouble)
{
c = push87();
cs.Iop = op1;
cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ...
c = gen(c,&cs); // FLD e->E1
genf2(c,0xDE,opld); // FopP ST(1)
pop87();
}
else
{ cs.Iop = op1 & ~1;
cs.Irm |= op;
c = gen(CNIL,&cs); // Fop e->E1
}
if (*pretregs & mPSW)
genftst(c,e,0); // FTST ST0
/* if want result in registers */
if (*pretregs & (mST0 | ALLREGS | mBP))
{
if (ty1 == TYldouble || ty1 == TYildouble)
{
c = cat(c,push87());
c = genf2(c,0xD9,0xC0); // FLD ST(0)
pop87();
}
else
op2 = 2; // FST e->E1
}
else
{ // FSTP
pop87();
}
cs.Iop = op1;
NEWREG(cs.Irm,op2); // FSTx e->E1
freenode(e->E1);
gen(c,&cs);
genfwait(c);
return cat4(cr,cl,c,fixresult87(e,mST0 | mPSW,pretregs));
}
/***********************************
* Perform %= where E1 is complex and E2 is real or imaginary.
*/
code *opmod_complex87(elem *e,regm_t *pretregs)
{
regm_t retregs;
code *cl,*cr,*c;
code cs;
tym_t ty1;
unsigned sz2;
/* fld E2
fld E1.re
FM1: fprem
fstsw word ptr sw
fwait
mov AH, byte ptr sw+1
jp FM1
fxch ST(1)
fld E1.im
FM2: fprem
fstsw word ptr sw
fwait
mov AH, byte ptr sw+1
jp FM2
fstp ST(1)
*/
ty1 = tybasic(e->E1->Ety);
sz2 = tysize[ty1] / 2;
retregs = mST0;
cr = codelem(e->E2,&retregs,FALSE); // FLD E2
note87(e->E2,0,0);
cl = getlvalue(&cs,e->E1,0);
cl = cat(cl,makesure87(e->E2,0,0,0));
cs.Iflags |= ADDFWAIT() ? CFwait : 0;
if (!I16)
cs.Iflags &= ~CFopsize;
c = push87();
switch (ty1)
{
case TYcdouble: cs.Iop = ESC(MFdouble,1); break;
case TYcfloat: cs.Iop = ESC(MFfloat,1); break;
case TYcldouble: cs.Iop = 0xDB; cs.Irm |= modregrm(0, 5, 0); break;
default:
assert(0);
}
c = gen(c,&cs); // FLD E1.re
code *c1;
c1 = gen2(NULL, 0xD9, 0xF8); // FPREM
c1 = cg87_87topsw(c1);
c1 = genjmp(c1, JP, FLcode, (block *)c1); // JP FM1
c1 = genf2(c1, 0xD9, 0xC8 + 1); // FXCH ST(1)
c = cat(c,c1);
c = cat(c, push87());
cs.IEVoffset1 += sz2;
gen(c, &cs); // FLD E1.im
c1 = gen2(NULL, 0xD9, 0xF8); // FPREM
c1 = cg87_87topsw(c1);
c1 = genjmp(c1, JP, FLcode, (block *)c1); // JP FM2
c1 = genf2(c1,0xDD,0xD8 + 1); // FSTP ST(1)
c = cat(c,c1);
pop87();
if (*pretregs & (mST01 | mPSW))
{
cs.Irm |= modregrm(0, 2, 0);
gen(c, &cs); // FST mreal.im
cs.IEVoffset1 -= sz2;
gen(c, &cs); // FST mreal.re
retregs = mST01;
}
else
{
cs.Irm |= modregrm(0, 3, 0);
gen(c, &cs); // FSTP mreal.im
cs.IEVoffset1 -= sz2;
gen(c, &cs); // FSTP mreal.re
pop87();
pop87();
retregs = 0;
}
freenode(e->E1);
genfwait(c);
return cat4(cr,cl,c,fixresult_complex87(e,retregs,pretregs));
}
/**********************************
* Perform +=, -=, *= and /= for the lvalue being complex.
*/
code *opass_complex87(elem *e,regm_t *pretregs)
{
regm_t retregs;
regm_t idxregs;
code *cl,*cr,*c;
code cs;
unsigned op;
unsigned op2;
tym_t ty1;
unsigned sz2;
ty1 = tybasic(e->E1->Ety);
sz2 = tysize[ty1] / 2;
switch (e->Eoper)
{ case OPpostinc:
case OPaddass: op = 0 << 3; // FADD
op2 = 0xC0; // FADDP ST(i),ST
break;
case OPpostdec:
case OPminass: op = 5 << 3; // FSUBR
op2 = 0xE0; // FSUBRP ST(i),ST
break;
case OPmulass: op = 1 << 3; // FMUL
op2 = 0xC8; // FMULP ST(i),ST
break;
case OPdivass: op = 7 << 3; // FDIVR
op2 = 0xF0; // FDIVRP ST(i),ST
break;
default: assert(0);
}
if (!tycomplex(e->E2->Ety) &&
(e->Eoper == OPmulass || e->Eoper == OPdivass))
{
retregs = mST0;
cr = codelem(e->E2, &retregs, FALSE);
note87(e->E2, 0, 0);
cl = getlvalue(&cs, e->E1, 0);
cl = cat(cl,makesure87(e->E2,0,0,0));
cl = cat(cl,push87());
cl = genf2(cl,0xD9,0xC0); // FLD ST(0)
goto L1;
}
else
{
cr = loadComplex(e->E2);
cl = getlvalue(&cs,e->E1,0);
cl = cat(cl,makesure87(e->E2,sz2,0,0));
cl = cat(cl,makesure87(e->E2,0,1,0));
}
cs.Iflags |= ADDFWAIT() ? CFwait : 0;
if (!I16)
cs.Iflags &= ~CFopsize;
switch (e->Eoper)
{
case OPpostinc:
case OPaddass:
case OPpostdec:
case OPminass:
L1:
if (ty1 == TYcldouble)
{
c = push87();
c = cat(c, push87());
cs.Iop = 0xDB;
cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ...
c = gen(c,&cs); // FLD e->E1.re
cs.IEVoffset1 += sz2;
gen(c,&cs); // FLD e->E1.im
genf2(c, 0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST
genf2(c, 0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST
pop87();
pop87();
if (tyimaginary(e->E2->Ety))
{
if (e->Eoper == OPmulass)
{
genf2(c, 0xD9, 0xE0); // FCHS
genf2(c, 0xD9, 0xC8+1); // FXCH ST(1)
}
else if (e->Eoper == OPdivass)
{
genf2(c, 0xD9, 0xC8+1); // FXCH ST(1)
genf2(c, 0xD9, 0xE0); // FCHS
}
}
L2:
if (*pretregs & (mST01 | mPSW))
{
c = cat(c,push87());
c = cat(c,push87());
c = genf2(c,0xD9,0xC1); // FLD ST(1)
c = genf2(c,0xD9,0xC1); // FLD ST(1)
retregs = mST01;
}
else
retregs = 0;
cs.Iop = 0xDB;
cs.Irm |= modregrm(0,7,0);
gen(c,&cs); // FSTP e->E1.im
cs.IEVoffset1 -= sz2;
gen(c,&cs); // FSTP e->E1.re
pop87();
pop87();
}
else
{ unsigned char rmop = cs.Irm | op;
unsigned char rmfst = cs.Irm | modregrm(0,2,0);
unsigned char rmfstp = cs.Irm | modregrm(0,3,0);
unsigned char iopfst = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
unsigned char iop = (ty1 == TYcfloat) ? 0xD8 : 0xDC;
cs.Iop = iop;
cs.Irm = rmop;
cs.IEVoffset1 += sz2;
c = gen(NULL, &cs); // FSUBR mreal.im
if (tyimaginary(e->E2->Ety) && (e->Eoper == OPmulass || e->Eoper == OPdivass))
{
if (e->Eoper == OPmulass)
genf2(c, 0xD9, 0xE0); // FCHS
genf2(c,0xD9,0xC8 + 1); // FXCH ST(1)
cs.IEVoffset1 -= sz2;
gen(c, &cs); // FMUL mreal.re
if (e->Eoper == OPdivass)
genf2(c, 0xD9, 0xE0); // FCHS
if (*pretregs & (mST01 | mPSW))
{
cs.Iop = iopfst;
cs.Irm = rmfst;
cs.IEVoffset1 += sz2;
gen(c, &cs); // FST mreal.im
genf2(c,0xD9,0xC8 + 1); // FXCH ST(1)
cs.IEVoffset1 -= sz2;
gen(c, &cs); // FST mreal.re
genf2(c,0xD9,0xC8 + 1); // FXCH ST(1)
retregs = mST01;
}
else
{
cs.Iop = iopfst;
cs.Irm = rmfstp;
cs.IEVoffset1 += sz2;
gen(c, &cs); // FSTP mreal.im
pop87();
cs.IEVoffset1 -= sz2;
gen(c, &cs); // FSTP mreal.re
pop87();
retregs = 0;
}
goto L3;
}
if (*pretregs & (mST01 | mPSW))
{
cs.Iop = iopfst;
cs.Irm = rmfst;
gen(c, &cs); // FST mreal.im
genf2(c,0xD9,0xC8 + 1); // FXCH ST(1)
cs.Iop = iop;
cs.Irm = rmop;
cs.IEVoffset1 -= sz2;
gen(c, &cs); // FSUBR mreal.re
cs.Iop = iopfst;
cs.Irm = rmfst;
gen(c, &cs); // FST mreal.re
genf2(c,0xD9,0xC8 + 1); // FXCH ST(1)
retregs = mST01;
}
else
{
cs.Iop = iopfst;
cs.Irm = rmfstp;
gen(c, &cs); // FSTP mreal.im
pop87();
cs.Iop = iop;
cs.Irm = rmop;
cs.IEVoffset1 -= sz2;
gen(c, &cs); // FSUBR mreal.re
cs.Iop = iopfst;
cs.Irm = rmfstp;
gen(c, &cs); // FSTP mreal.re
pop87();
retregs = 0;
}
}
L3:
freenode(e->E1);
genfwait(c);
return cat4(cr,cl,c,fixresult_complex87(e,retregs,pretregs));
case OPmulass:
c = push87();
c = cat(c, push87());
if (ty1 == TYcldouble)
{
cs.Iop = 0xDB;
cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ...
c = gen(c,&cs); // FLD e->E1.re
cs.IEVoffset1 += sz2;
gen(c,&cs); // FLD e->E1.im
retregs = mST01;
c = cat(c,callclib(e, CLIBcmul, &retregs, 0));
goto L2;
}
else
{
cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ...
c = gen(c,&cs); // FLD e->E1.re
cs.IEVoffset1 += sz2;
gen(c,&cs); // FLD e->E1.im
retregs = mST01;
c = cat(c,callclib(e, CLIBcmul, &retregs, 0));
if (*pretregs & (mST01 | mPSW))
{
cs.Irm |= modregrm(0, 2, 0);
gen(c, &cs); // FST mreal.im
cs.IEVoffset1 -= sz2;
gen(c, &cs); // FST mreal.re
retregs = mST01;
}
else
{
cs.Irm |= modregrm(0, 3, 0);
gen(c, &cs); // FSTP mreal.im
cs.IEVoffset1 -= sz2;
gen(c, &cs); // FSTP mreal.re
pop87();
pop87();
retregs = 0;
}
goto L3;
}
case OPdivass:
c = push87();
c = cat(c, push87());
idxregs = idxregm(&cs); // mask of index regs used
if (ty1 == TYcldouble)
{
cs.Iop = 0xDB;
cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ...
c = gen(c,&cs); // FLD e->E1.re
genf2(c,0xD9,0xC8 + 2); // FXCH ST(2)
cs.IEVoffset1 += sz2;
gen(c,&cs); // FLD e->E1.im
genf2(c,0xD9,0xC8 + 2); // FXCH ST(2)
retregs = mST01;
c = cat(c,callclib(e, CLIBcdiv, &retregs, idxregs));
goto L2;
}
else
{
cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ...
c = gen(c,&cs); // FLD e->E1.re
genf2(c,0xD9,0xC8 + 2); // FXCH ST(2)
cs.IEVoffset1 += sz2;
gen(c,&cs); // FLD e->E1.im
genf2(c,0xD9,0xC8 + 2); // FXCH ST(2)
retregs = mST01;
c = cat(c,callclib(e, CLIBcdiv, &retregs, idxregs));
if (*pretregs & (mST01 | mPSW))
{
cs.Irm |= modregrm(0, 2, 0);
gen(c, &cs); // FST mreal.im
cs.IEVoffset1 -= sz2;
gen(c, &cs); // FST mreal.re
retregs = mST01;
}
else
{
cs.Irm |= modregrm(0, 3, 0);
gen(c, &cs); // FSTP mreal.im
cs.IEVoffset1 -= sz2;
gen(c, &cs); // FSTP mreal.re
pop87();
pop87();
retregs = 0;
}
goto L3;
}
default:
assert(0);
}
return NULL;
}
/**************************
* OPnegass
*/
code *cdnegass87(elem *e,regm_t *pretregs)
{ regm_t retregs;
tym_t tyml;
unsigned op;
code *cl,*cr,*c,cs;
elem *e1;
int sz;
//printf("cdnegass87(e = %p, *pretregs = x%x)\n", e, *pretregs);
e1 = e->E1;
tyml = tybasic(e1->Ety); // type of lvalue
sz = tysize[tyml];
cl = getlvalue(&cs,e1,0);
cr = modEA(&cs);
cs.Irm |= modregrm(0,6,0);
cs.Iop = 0x80;
cs.Irex = 0;
#if LNGDBLSIZE > 10
if (tyml == TYldouble || tyml == TYildouble)
cs.IEVoffset1 += 10 - 1;
else if (tyml == TYcldouble)
cs.IEVoffset1 += tysize[TYldouble] + 10 - 1;
else
#endif
cs.IEVoffset1 += sz - 1;
cs.IFL2 = FLconst;
cs.IEV2.Vuns = 0x80;
c = gen(NULL,&cs); // XOR 7[EA],0x80
if (tycomplex(tyml))
{
cs.IEVoffset1 -= sz / 2;
gen(c,&cs); // XOR 7[EA],0x80
}
c = cat3(cl,cr,c);
if (*pretregs)
{
switch (tyml)
{
case TYifloat:
case TYfloat: cs.Iop = 0xD9; op = 0; break;
case TYidouble:
case TYdouble:
case TYdouble_alias: cs.Iop = 0xDD; op = 0; break;
case TYildouble:
case TYldouble: cs.Iop = 0xDB; op = 5; break;
default:
assert(0);
}
NEWREG(cs.Irm,op);
cs.IEVoffset1 -= sz - 1;
c = cat(c, push87());
c = gen(c,&cs); // FLD EA
retregs = mST0;
}
else
retregs = 0;
freenode(e1);
return cat(c,fixresult87(e,retregs,pretregs));
}
/************************
* Take care of OPpostinc and OPpostdec.
*/
code *post87(elem *e,regm_t *pretregs)
{
regm_t retregs;
code *cl,*cr,*c;
code cs;
unsigned op;
unsigned op1;
unsigned reg;
tym_t ty1;
//printf("post87()\n");
assert(*pretregs);
cl = getlvalue(&cs,e->E1,0);
cs.Iflags |= ADDFWAIT() ? CFwait : 0;
if (!I16)
cs.Iflags &= ~CFopsize;
ty1 = tybasic(e->E1->Ety);
switch (ty1)
{ case TYdouble_alias:
case TYidouble:
case TYdouble:
case TYcdouble: op1 = ESC(MFdouble,1); reg = 0; break;
case TYifloat:
case TYfloat:
case TYcfloat: op1 = ESC(MFfloat,1); reg = 0; break;
case TYildouble:
case TYldouble:
case TYcldouble: op1 = 0xDB; reg = 5; break;
default:
assert(0);
}
NEWREG(cs.Irm, reg);
if (reg == 5)
reg = 7;
else
reg = 3;
cs.Iop = op1;
cl = cat(cl,push87());
cl = gen(cl,&cs); // FLD e->E1
if (tycomplex(ty1))
{ unsigned sz = tysize[ty1] / 2;
cl = cat(cl,push87());
cs.IEVoffset1 += sz;
cl = gen(cl,&cs); // FLD e->E1
retregs = mST0; // note kludge to only load real part
cr = codelem(e->E2,&retregs,FALSE); // load rvalue
c = genf2(NULL,0xD8, // FADD/FSUBR ST,ST2
(e->Eoper == OPpostinc) ? 0xC0 + 2 : 0xE8 + 2);
NEWREG(cs.Irm,reg);
pop87();
cs.IEVoffset1 -= sz;
gen(c,&cs); // FSTP e->E1
genfwait(c);
freenode(e->E1);
return cat4(cl, cr, c, fixresult_complex87(e, mST01, pretregs));
}
if (*pretregs & (mST0 | ALLREGS | mBP))
{ // Want the result in a register
cl = cat(cl,push87());
genf2(cl,0xD9,0xC0); // FLD ST0
}
if (*pretregs & mPSW) /* if result in flags */
genftst(cl,e,0); // FTST ST0
retregs = mST0;
cr = codelem(e->E2,&retregs,FALSE); /* load rvalue */
pop87();
op = (e->Eoper == OPpostinc) ? modregrm(3,0,1) : modregrm(3,5,1);
c = genf2(NULL,0xDE,op); // FADDP/FSUBRP ST1
NEWREG(cs.Irm,reg);
pop87();
gen(c,&cs); /* FSTP e->E1 */
genfwait(c);
freenode(e->E1);
return cat4(cl,cr,c,fixresult87(e,mPSW | mST0,pretregs));
}
/************************
* Do the following opcodes:
* OPd_s16
* OPd_s32
* OPd_u16
* OPd_s64
*/
code *cnvt87(elem *e,regm_t *pretregs)
{
regm_t retregs;
code *c1,*c2;
unsigned mf,rf,reg;
tym_t tym;
int clib;
int sz;
int szoff;
//printf("cnvt87(e = %p, *pretregs = x%x)\n", e, *pretregs);
assert(*pretregs);
tym = e->Ety;
sz = tysize(tym);
szoff = sz;
unsigned grex = I64 ? REX_W << 16 : 0;
switch (e->Eoper)
{ case OPd_s16:
clib = CLIBdblint87;
mf = ESC(MFword,1);
rf = 3;
break;
case OPd_u16:
szoff = 4;
case OPd_s32:
clib = CLIBdbllng87;
mf = ESC(MFlong,1);
rf = 3;
break;
case OPd_s64:
clib = CLIBdblllng;
mf = 0xDF;
rf = 7;
break;
default:
assert(0);
}
if (I16) // C may change the default control word
{
if (clib == CLIBdblllng)
{ retregs = I32 ? DOUBLEREGS_32 : DOUBLEREGS_16;
c1 = codelem(e->E1,&retregs,FALSE);
c2 = callclib(e,clib,pretregs,0);
}
else
{ retregs = mST0; //I32 ? DOUBLEREGS_32 : DOUBLEREGS_16;
c1 = codelem(e->E1,&retregs,FALSE);
c2 = callclib(e,clib,pretregs,0);
pop87();
}
}
else if (1)
{ // Generate:
// sub ESP,12
// fstcw 8[ESP]
// fldcw roundto0
// fistp long64 ptr [ESP]
// fldcw 8[ESP]
// pop lsw
// pop msw
// add ESP,4
unsigned szpush = szoff + 2;
if (config.flags3 & CFG3pic)
szpush += 2;
szpush = (szpush + REGSIZE - 1) & ~(REGSIZE - 1);
retregs = mST0;
c1 = codelem(e->E1,&retregs,FALSE);
if (szpush == REGSIZE)
c1 = gen1(c1,0x50 + AX); // PUSH EAX
else
c1 = genc2(c1,0x81,grex | modregrm(3,5,SP), szpush); // SUB ESP,12
c1 = genfwait(c1);
genc1(c1,0xD9,modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP]
c1 = genfwait(c1);
if (config.flags3 & CFG3pic)
{
genc(c1,0xC7,modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW_roundto0); // MOV szoff+2[ESP], CW_roundto0
code_orflag(c1, CFopsize);
genc1(c1,0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP]
}
else
c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0
pop87();
c1 = genfwait(c1);
gen2sib(c1,mf,grex | modregrm(0,rf,4),modregrm(0,4,SP)); // FISTP [ESP]
retregs = *pretregs & (ALLREGS | mBP);
if (!retregs)
retregs = ALLREGS;
c2 = allocreg(&retregs,&reg,tym);
c2 = genfwait(c2); // FWAIT
c2 = genc1(c2,0xD9,grex | modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP]
if (szoff > REGSIZE)
{ szpush -= REGSIZE;
c2 = genpop(c2,findreglsw(retregs)); // POP lsw
}
szpush -= REGSIZE;
c2 = genpop(c2,reg); // POP reg
if (szpush)
genc2(c2,0x81,grex | modregrm(3,0,SP), szpush); // ADD ESP,4
c2 = cat(c2,fixresult(e,retregs,pretregs));
}
else
{
// This is incorrect. For -inf and nan, the 8087 returns the largest
// negative int (0x80000....). For -inf, 0x7FFFF... should be returned,
// and for nan, 0 should be returned.
retregs = mST0;
c1 = codelem(e->E1,&retregs,FALSE);
c1 = genfwait(c1);
c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0
pop87();
c1 = genfltreg(c1,mf,rf,0); // FISTP floatreg
retregs = *pretregs & (ALLREGS | mBP);
if (!retregs)
retregs = ALLREGS;
c2 = allocreg(&retregs,&reg,tym);
c2 = genfwait(c2);
if (sz > REGSIZE)
{ c2 = genfltreg(c2,0x8B,reg,REGSIZE); // MOV reg,floatreg + REGSIZE
// MOV lsreg,floatreg
genfltreg(c2,0x8B,findreglsw(retregs),0);
}
else
c2 = genfltreg(c2,0x8B,reg,0); // MOV reg,floatreg
c2 = genrnd(c2, CW_roundtonearest); // FLDCW roundtonearest
c2 = cat(c2,fixresult(e,retregs,pretregs));
}
return cat(c1,c2);
}
/************************
* Do OPrndtol.
*/
code *cdrndtol(elem *e,regm_t *pretregs)
{
regm_t retregs;
code *c1,*c2;
unsigned reg;
tym_t tym;
unsigned sz;
unsigned char op1,op2;
if (*pretregs == 0)
return codelem(e->E1,pretregs,FALSE);
tym = e->Ety;
retregs = mST0;
c1 = codelem(e->E1,&retregs,FALSE);
sz = tysize(tym);
switch (sz)
{ case 2:
op1 = 0xDF;
op2 = 3;
break;
case 4:
op1 = 0xDB;
op2 = 3;
break;
case 8:
op1 = 0xDF;
op2 = 7;
break;
default:
assert(0);
}
pop87();
c1 = genfltreg(c1,op1,op2,0); // FISTP floatreg
retregs = *pretregs & (ALLREGS | mBP);
if (!retregs)
retregs = ALLREGS;
c2 = allocreg(&retregs,&reg,tym);
c2 = genfwait(c2); // FWAIT
if (tysize(tym) > REGSIZE)
{ c2 = genfltreg(c2,0x8B,reg,REGSIZE); // MOV reg,floatreg + REGSIZE
// MOV lsreg,floatreg
genfltreg(c2,0x8B,findreglsw(retregs),0);
}
else
{
c2 = genfltreg(c2,0x8B,reg,0); // MOV reg,floatreg
if (tysize(tym) == 8 && I64)
code_orrex(c2, REX_W);
}
c2 = cat(c2,fixresult(e,retregs,pretregs));
return cat(c1,c2);
}
/*************************
* Do OPscale, OPyl2x, OPyl2xp1.
*/
code *cdscale(elem *e,regm_t *pretregs)
{
regm_t retregs;
code *c1,*c2,*c3;
assert(*pretregs != 0);
retregs = mST0;
c1 = codelem(e->E1,&retregs,FALSE);
note87(e->E1,0,0);
c2 = codelem(e->E2,&retregs,FALSE);
c2 = cat(c2,makesure87(e->E1,0,1,0)); // now have x,y on stack; need y,x
switch (e->Eoper)
{
case OPscale:
c2 = genf2(c2,0xD9,0xFD); // FSCALE
genf2(c2,0xDD,0xD8 + 1); // FSTP ST(1)
break;
case OPyl2x:
c2 = genf2(c2,0xD9,0xF1); // FYL2X
break;
case OPyl2xp1:
c2 = genf2(c2,0xD9,0xF9); // FYL2XP1
break;
}
pop87();
c3 = fixresult87(e,mST0,pretregs);
return cat3(c1,c2,c3);
}
/**********************************
* Unary -, absolute value, square root, sine, cosine
*/
code *neg87(elem *e,regm_t *pretregs)
{
regm_t retregs;
code *c1,*c2;
int op;
assert(*pretregs);
switch (e->Eoper)
{ case OPneg: op = 0xE0; break;
case OPabs: op = 0xE1; break;
case OPsqrt: op = 0xFA; break;
case OPsin: op = 0xFE; break;
case OPcos: op = 0xFF; break;
case OPrint: op = 0xFC; break; // FRNDINT
default:
assert(0);
}
retregs = mST0;
c1 = codelem(e->E1,&retregs,FALSE);
c1 = genf2(c1,0xD9,op); // FCHS/FABS/FSQRT/FSIN/FCOS/FRNDINT
c2 = fixresult87(e,mST0,pretregs);
return cat(c1,c2);
}
/**********************************
* Unary - for complex operands
*/
code *neg_complex87(elem *e,regm_t *pretregs)
{
regm_t retregs;
code *c1,*c2;
assert(e->Eoper == OPneg);
retregs = mST01;
c1 = codelem(e->E1,&retregs,FALSE);
c1 = genf2(c1,0xD9,0xE0); // FCHS
genf2(c1,0xD9,0xC8 + 1); // FXCH ST(1)
genf2(c1,0xD9,0xE0); // FCHS
genf2(c1,0xD9,0xC8 + 1); // FXCH ST(1)
c2 = fixresult_complex87(e,mST01,pretregs);
return cat(c1,c2);
}
/*********************************
*/
code *cdind87(elem *e,regm_t *pretregs)
{ code *c,*ce,cs;
//printf("cdind87(e = %p, *pretregs = x%x)\n",e,*pretregs);
c = getlvalue(&cs,e,0); // get addressing mode
if (*pretregs)
{
switch (tybasic(e->Ety))
{ case TYfloat:
case TYifloat:
cs.Iop = 0xD9;
break;
case TYidouble:
case TYdouble:
case TYdouble_alias:
cs.Iop = 0xDD;
break;
case TYildouble:
case TYldouble:
cs.Iop = 0xDB;
cs.Irm |= modregrm(0,5,0);
break;
default:
assert(0);
}
c = cat(c,push87());
c = gen(c,&cs); // FLD EA
ce = fixresult87(e,mST0,pretregs);
c = cat(c,ce);
}
return c;
}
/************************************
* Reset statics for another .obj file.
*/
void cg87_reset()
{
memset(&oldd,0,sizeof(oldd));
}
/*****************************************
* Initialize control word constants.
*/
STATIC code *genrnd(code *c, short cw)
{
if (config.flags3 & CFG3pic)
{ code *c1;
c1 = genfltreg(NULL, 0xC7, 0, 0); // MOV floatreg, cw
c1->IFL2 = FLconst;
c1->IEV2.Vuns = cw;
c1 = genfltreg(c1, 0xD9, 5, 0); // FLDCW floatreg
c = cat(c, c1);
}
else
{
if (!oldd.round) // if not initialized
{ short cwi;
oldd.round = 1;
cwi = CW_roundto0; // round to 0
oldd.roundto0 = out_readonly_sym(TYshort,&cwi,2);
cwi = CW_roundtonearest; // round to nearest
oldd.roundtonearest = out_readonly_sym(TYshort,&cwi,2);
}
symbol *rnddir = (cw == CW_roundto0) ? oldd.roundto0 : oldd.roundtonearest;
code cs;
cs.Iop = 0xD9;
cs.Iflags = CFoff;
cs.Irex = 0;
cs.IEVsym1 = rnddir;
cs.IFL1 = rnddir->Sfl;
cs.IEVoffset1 = 0;
cs.Irm = modregrm(0,5,BPRM);
c = gen(c,&cs);
}
return c;
}
/************************* Complex Numbers *********************/
/***************************
* Set the PSW based on the state of ST01.
* Input:
* pop if stack should be popped after test
* Returns:
* start of code appended to c.
*/
STATIC code * genctst(code *c,elem *e,int pop)
#if __DMC__
__in
{
assert(pop == 0 || pop == 1);
}
__body
#endif
{
// Generate:
// if (pop)
// FLDZ
// FUCOMPP
// FSTSW AX
// SAHF
// FLDZ
// FUCOMPP
// JNE L1
// JP L1 // if NAN
// FSTSW AX
// SAHF
// L1:
// else
// FLDZ
// FUCOM
// FSTSW AX
// SAHF
// FUCOMP ST(2)
// JNE L1
// JP L1 // if NAN
// FSTSW AX
// SAHF
// L1:
// FUCOMP doesn't raise exceptions on QNANs, unlike FTST
code *cnop;
cnop = gennop(CNIL);
c = cat(c,push87());
c = gen2(c,0xD9,0xEE); // FLDZ
if (pop)
{
gen2(c,0xDA,0xE9); // FUCOMPP
pop87();
pop87();
cg87_87topsw(c); // put 8087 flags in CPU flags
gen2(c,0xD9,0xEE); // FLDZ
gen2(c,0xDA,0xE9); // FUCOMPP
pop87();
genjmp(c,JNE,FLcode,(block *) cnop); // JNE L1
genjmp(c,JP, FLcode,(block *) cnop); // JP L1
cg87_87topsw(c); // put 8087 flags in CPU flags
}
else
{
gen2(c,0xDD,0xE1); // FUCOM
cg87_87topsw(c); // put 8087 flags in CPU flags
gen2(c,0xDD,0xEA); // FUCOMP ST(2)
pop87();
genjmp(c,JNE,FLcode,(block *) cnop); // JNE L1
genjmp(c,JP, FLcode,(block *) cnop); // JP L1
cg87_87topsw(c); // put 8087 flags in CPU flags
}
return cat(c, cnop);
}
/******************************
* Given the result of an expression is in retregs,
* generate necessary code to return result in *pretregs.
*/
code *fixresult_complex87(elem *e,regm_t retregs,regm_t *pretregs)
{
tym_t tym;
code *c1,*c2;
unsigned sz;
#if 0
printf("fixresult_complex87(e = %p, retregs = %s, *pretregs = %s)\n",
e,regm_str(retregs),regm_str(*pretregs));
#endif
assert(!*pretregs || retregs);
c1 = CNIL;
c2 = CNIL;
tym = tybasic(e->Ety);
sz = tysize[tym];
if (*pretregs == 0 && retregs == mST01)
{
c1 = genf2(c1,0xDD,modregrm(3,3,0)); // FPOP
pop87();
c1 = genf2(c1,0xDD,modregrm(3,3,0)); // FPOP
pop87();
}
else if (tym == TYcfloat && *pretregs & (mAX|mDX) && retregs & mST01)
{
if (*pretregs & mPSW && !(retregs & mPSW))
c1 = genctst(c1,e,0); // FTST
pop87();
c1 = genfltreg(c1, ESC(MFfloat,1),3,0); // FSTP floatreg
genfwait(c1);
c2 = getregs(mDX|mAX);
c2 = genfltreg(c2, 0x8B, DX, 0); // MOV EDX,floatreg
pop87();
c2 = genfltreg(c2, ESC(MFfloat,1),3,0); // FSTP floatreg
genfwait(c2);
c2 = genfltreg(c2, 0x8B, AX, 0); // MOV EAX,floatreg
}
else if (tym == TYcfloat && retregs & (mAX|mDX) && *pretregs & mST01)
{
c1 = push87();
c1 = genfltreg(c1, 0x89, AX, 0); // MOV floatreg, EAX
genfltreg(c1, 0xD9, 0, 0); // FLD float ptr floatreg
c2 = push87();
c2 = genfltreg(c2, 0x89, DX, 0); // MOV floatreg, EDX
genfltreg(c2, 0xD9, 0, 0); // FLD float ptr floatreg
if (*pretregs & mPSW)
c2 = genctst(c2,e,0); // FTST
}
else if ((tym == TYcfloat || tym == TYcdouble) &&
*pretregs & (mXMM0|mXMM1) && retregs & mST01)
{
if (*pretregs & mPSW && !(retregs & mPSW))
c1 = genctst(c1,e,0); // FTST
pop87();
c1 = genfltreg(c1, ESC(MFdouble,1),3,0); // FSTP floatreg
genfwait(c1);
c2 = getregs(mXMM0|mXMM1);
c2 = genfltreg(c2, 0xF20F10, XMM1 - XMM0, 0); // MOVD XMM1,floatreg
pop87();
c2 = genfltreg(c2, ESC(MFdouble,1),3,0); // FSTP floatreg
genfwait(c2);
c2 = genfltreg(c2, 0xF20F10, XMM0 - XMM0, 0); // MOVD XMM0,floatreg
}
else if ((tym == TYcfloat || tym == TYcdouble) &&
retregs & (mXMM0|mXMM1) && *pretregs & mST01)
{
c1 = push87();
c1 = genfltreg(c1, 0xF20F11, XMM0-XMM0, 0); // MOVD floatreg, XMM0
genfltreg(c1, 0xDD, 0, 0); // FLD double ptr floatreg
c2 = push87();
c2 = genfltreg(c2, 0xF20F11, XMM1-XMM0, 0); // MOV floatreg, XMM1
genfltreg(c2, 0xDD, 0, 0); // FLD double ptr floatreg
if (*pretregs & mPSW)
c2 = genctst(c2,e,0); // FTST
}
else
{ if (*pretregs & mPSW)
{ if (!(retregs & mPSW))
{ assert(retregs & mST01);
c1 = genctst(c1,e,!(*pretregs & mST01)); // FTST
}
}
assert(!(*pretregs & mST01) || (retregs & mST01));
}
if (*pretregs & mST01)
{ note87(e,0,1);
note87(e,sz/2,0);
}
return cat(c1,c2);
}
/*****************************************
* Operators OPc_r and OPc_i
*/
code *cdconvt87(elem *e, regm_t *pretregs)
{
regm_t retregs;
code *c;
retregs = mST01;
c = codelem(e->E1, &retregs, FALSE);
switch (e->Eoper)
{
case OPc_r:
c = genf2(c,0xDD,0xD8 + 0); // FPOP
pop87();
break;
case OPc_i:
c = genf2(c,0xDD,0xD8 + 1); // FSTP ST(1)
pop87();
break;
default:
assert(0);
}
retregs = mST0;
c = cat(c, fixresult87(e, retregs, pretregs));
return c;
}
/**************************************
* Load complex operand into ST01 or flags or both.
*/
code *cload87(elem *e, regm_t *pretregs)
#if __DMC__
__in
{
assert(I32 && config.inline8087);
elem_debug(e);
assert(*pretregs & (mST01 | mPSW));
assert(!(*pretregs & ~(mST01 | mPSW)));
}
__out (result)
{
}
__body
#endif
{
tym_t ty = tybasic(e->Ety);
code *c = NULL;
code *cpush = NULL;
code cs;
unsigned mf;
unsigned sz;
unsigned char ldop;
regm_t retregs;
int i;
//printf("cload87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
sz = tysize[ty] / 2;
memset(&cs, 0, sizeof(cs));
if (ADDFWAIT())
cs.Iflags = CFwait;
switch (ty)
{
case TYcfloat: mf = MFfloat; break;
case TYcdouble: mf = MFdouble; break;
case TYcldouble: break;
default: assert(0);
}
switch (e->Eoper)
{
case OPvar:
notreg(e); // never enregister this variable
case OPind:
cpush = cat(push87(), push87());
switch (ty)
{
case TYcfloat:
case TYcdouble:
c = loadea(e,&cs,ESC(mf,1),0,0,0,0); // FLD var
cs.IEVoffset1 += sz;
c = gen(c, &cs);
break;
case TYcldouble:
c = loadea(e,&cs,0xDB,5,0,0,0); // FLD var
cs.IEVoffset1 += sz;
c = gen(c, &cs);
break;
default:
assert(0);
}
retregs = mST01;
break;
case OPd_ld:
case OPld_d:
case OPf_d:
case OPd_f:
c = cload87(e->E1, pretregs);
freenode(e->E1);
return c;
case OPconst:
cpush = cat(push87(), push87());
for (i = 0; i < 2; i++)
{
ldop = loadconst(e, i);
if (ldop)
{
c = genf2(c,0xD9,ldop); // FLDx
}
else
{
assert(0);
}
}
retregs = mST01;
break;
default:
#ifdef DEBUG
elem_print(e);
#endif
assert(0);
}
return cat4(cpush,c,fixresult_complex87(e, retregs, pretregs), NULL);
}
#endif // !SPP