Files
ldc/backend/cod3.c
Alexey Prokhin f2237662df Squashed 'dmd2/' changes from 10017d5..3443f38
3443f38 Fix issue 7493  Initialization of void[][N]
0b371da foreach can run semantic again
7216e2a fix Issue 7735 - Functions with variadic void[][]... arguments corrupt passed data
4fb2b2a Merge pull request #850 from 9rnsr/fix7773
9c59931 Merge pull request #851 from donc/ctfe7785pointerToVar
407f7e4 Merge pull request #852 from donc/segfault7639
9370f83 Fix issue 7380 Crash trying to use address of variable in struct constructor at module level
240866b Fix issue 7639 Undefined enum AA key crashes compiler
19b7096 Fix issue 7785 [CTFE] ICE when slicing pointer to variable
d9b11f6 fix Issue 7773 - UCFS syntax on built-in attributes too?
296d812 Merge pull request #846 from donc/ctfe7781segfault
65aca2d Merge pull request #848 from donc/regression7751
5576737 Merge pull request #849 from donc/bug7794
0310838 Merge pull request #828 from 9rnsr/fix7751
4027e4f Fix issue 7794 Sea of errors when calling regex() after compile error
59cc12d Fix issue 7781 [CTFE] Segmentation fault on 'mixin({return;}());'
3430947 fix seg fault in fail91.d
948274e Merge pull request #824 from donc/regression7745
22ac4b1 Merge pull request #826 from 9rnsr/fix6659
1c15841 Merge pull request #823 from redstar/mscclean
5f54752 Merge pull request #827 from 9rnsr/fix7694
399e4a3 Merge pull request #844 from donc/regression7782
516f49b Fix issue 7789 [CTFE] null pointer exception on setting array length
d74b354 Fix issue 7782 Regression: ICE with wrong import syntax
0269194 Fix issue 7751 [ICE] (Regression 2.059head) From auto and forward reference
42ad236 Merge pull request #830 from 9rnsr/fix_ufcs
67bf025 Merge pull request #832 from 9rnsr/fix7608
d13f107 Merge pull request #829 from 9rnsr/fix7754
e25cbe2 Merge pull request #834 from 9rnsr/fix2367
7fac235 merge D2 pull #842
c836773 Merge pull request #836 from 9rnsr/fix7757
a2754c5 Merge pull request #839 from 9rnsr/fix7768
4948836 fix Issue 7694 - Internal error: e2ir.c 1251 when calling member function inside struct via alias param
9f23335 Merge pull request #838 from 9rnsr/fix7621
92eba60 Merge pull request #840 from 9rnsr/fix7769
8fae3c2 fix issue 7742 - 'More initializers than fields' error with correct number of fields
6c2d706 to enum
35e4f08 fix Issue 7769 - relax inout rule doesn't work for template function
96a0105 fix Issue 7768 - More readable template error messages
8012d58 Merge pull request #831 from 9rnsr/fix7743
9c0cbdd fix Issue 7621 - Immutable type equivalence problem
f67f313 Merge pull request #833 from 9rnsr/fix7731
29754dd Merge pull request #837 from braddr/cleanup-backend2
374109a restore original binary() function and re-fix the new version
78c04aa fix Issue 7757 - Inout function with lazy inout parameter doesn't compile
50c34e9 fix Issue 7754 - static this() in template is stripped during header gen
11acdff Fix auto tester breaking.
f0b7157 fix Issue 7755 - regression(2.059head): ICE in glue.c
cfceb77 fix Issue 7751 - [ICE] From auto and forward reference
7a86807 fix Issue 2367 - Overloading error with string literals
6039c40 fix Issue 7731 - Assertion failure: 't' on line 7911 in file 'mtype.c'
aea3a39 fix Issue 7608 - __traits(allMembers) is broken
f46f07a fix Issue 7743 - Parsing problem with nothrow delegate
fa9d29f Revert "Revert "Refactor for UFCS property getter/setter resolution.""
d9698d8 Revert "Revert "fix Issue 7722 - Refuse normal functions to be used as properties""
0fbc772 Revert "Revert "Allow property function has two arguments""
07a3b09 fix Issue 6659 - Destructor in range foreach called after initialization
e499d4d Fix issue 7745 Regression(2.059beta) Methods defined in external object files when a pointer to it is taken
79a74e1 Fixes an unknown pragma warning.
2b12052 Fix issue 176 [module] message "module and package have the same name"
90e89a4 Merge pull request #814 from 9rnsr/fix7713
3ab0e79 Merge pull request #818 from donc/assoc7732
b3360e9 Fix issue 7732 [CTFE] wrong code for a struct called AssociativeArray
05f0b08 Merge pull request #779 from 9rnsr/fix7534
867e567 Revert "Allow property function has two arguments"
9171aeb Revert "fix Issue 7722 - Refuse normal functions to be used as properties"
989ced7 Revert "Refactor for UFCS property getter/setter resolution."
e9b5292 Refactor for UFCS property getter/setter resolution.
761d000 fix Issue 7722 - Refuse normal functions to be used as properties
9f5956b Allow property function has two arguments
1a11862 Revert "Allow property function has two arguments"
32f57e5 Revert "fix Issue 7722 - Refuse normal functions to be used as properties"
6489bb4 Revert "Refactor for UFCS property getter/setter resolution."
214296f Merge pull request #817 from 9rnsr/fix_ufcs
c3c7f2a Merge pull request #816 from donc/voidctfe6438
185d031 Refactor for UFCS property getter/setter resolution.
08bf89d fix Issue 7722 - Refuse normal functions to be used as properties
f0e3433 Allow property function has two arguments
1b67ac9 Direct check by Type::reliesOnTident
a3cd7d9 fix Issue 7713 - lambda inference doesn't work on template function argument
1762112 Fix issue 6438 - [CTFE] wrong error "value used before set" when slicing =void array
ace1eca fix complex constant folding
76f9b22 Consider return type covariance.
f700dbc fix Issue 7534 - Allow attribute-overloading of an overridden method
cba8f5c Merge pull request #763 from 9rnsr/fix7578
392d93f Merge pull request #815 from dawgfoto/fixSegFault
e48aba2 merge part of pull #769
d72a17e revert dd5a543
24d860b error(Loc loc,) doesn't abort program
4c79117 Use correct opcodes for moving cfloat from st->xmm and xmm->st
af875ff Merge pull request #785 from braddr/cleanup-backend2
9d3021a remove debugging printfs
b3df5ee Merge pull request #807 from dawgfoto/fix7698
f005537 Merge pull request #802 from dawgfoto/fixVC
65a145d Merge pull request #803 from donc/ctfeunion6681yebblies
1cf39ca Merge pull request #812 from 9rnsr/fix_ufcs
d846c3c Merge pull request #808 from 9rnsr/fix7702
fd0a492 fix Issue 7670 - UFCS problem with @property and structs
1ad35b2 Fix for UFCS with property syntax, and add exhaustive test
96f15a1 Resolve broken build after merging
4712aab fix regression
4e05482 Merge pull request #805 from donc/regression7681
245a107 dt_ functions aren't x86 specific
b35f43a another missing loc in an error() call
001addb minor cleanups
2fb1e46 make util_assert take a const string
907da39 cleanup whitespace in binary(), add binary() that takes the length of the string to search for
59d0425 Merge pull request #804 from braddr/nearsighted
d725eed Merge pull request #806 from donc/ctfe7633equalmsg
12a5c26 Merge pull request #811 from donc/bug7699
4279d5e revert the revert
c895c3b revert pull #809
865fb20 fix Issue 5733 - Calling opDispatch As Template Results in Compiler Infinite Loop
96e16d3 fix Issue 7702 - opDispatch goes into infinite loop
5e343c0 Remove special case for DotIdExp and opDispatch semantic, it isn't need anymore
1a9d607 Fix issue 7699 - Cannot get frame pointer to in contract when compiling with -inline
d1476eb Merge pull request #809 from 9rnsr/fix_funclit
afc7c60 allow out-of-order semantic analysis of fields
17da3a0 fix Issue 7705 - lambda syntax doesn't allow some valid signatures
e29d06d fix issue 7698
911d053 Fix issue 7633 - Missing CTFE error message
3802dde Fix issue 7681 Regression(2.059head):ICE:opCatAssign(delegate) to undefined identifier
8da4121 near-ectomy
cd6dc83 fix Library::error()s format string to take a const char*
f3f03c6 switch to apply()
faf873a fix Issue 3510 - Cannot forward reference a templated type from within a template mixin
23aa2be fix Issue 3509 - Cannot forward reference a template mixin's members in a compile-time context
e81309b Add missing 'loc' to error message.
b6898e3 Fix issue 6681 - struct constructor call is converted to struct literal that breaks union initialization
b79afba long double => longdouble
e48c319 Merge pull request #742 from yebblies/issue5879
d74485a Merge pull request #787 from eco/ddoc-srcfilename
3038cb9 Merge pull request #795 from dawgfoto/fixComment
89a039a Merge pull request #801 from dawgfoto/fix4507
c17c2d8 fix issue 4507
dd86c72 Merge pull request #796 from dawgfoto/fixVC
a516588 Merge pull request #797 from 9rnsr/fix7682
1b9839a Merge pull request #799 from 9rnsr/fix6982
4596774 Merge pull request #800 from 9rnsr/fix_type_deduction
b68d546 forgot about @system
bfe1083 add attributes to toHash
8f819d6 Stop special case in mutableOf/makeMutable with inout type.
319b1a3 Fix the lacks of type merging in Type::mutableOf() and uhSharedOf()
cfe7450 fix Issue 7671 - Broken inout deduction of shared(inout(T[n])) from immutable(int[3])
aca5c37 Stop too eager call of TypeAArray::getImpl() When implicitConvTo(non aa Tstruct => Taarray)
50b2a97 fix Issue 6982 - immutability isn't respected on associative array assignment
a5daa5e fix Issue 7684 - IFTI and shared overload doesn't work
e43fbac fix Issue 7682 - shared array type and "cast() is not an lvalue" error
8191801 cpp_prettyident only needed for C++
4487f75 fix ldval
525647c tparam is the specialization
f893925 fix issue 7592 d847c1c2dd
108b25d Merge pull request #780 from 9rnsr/fix7641
105a51f Merge pull request #784 from 9rnsr/fix7110
8b5b67f Merge pull request #792 from donc/bug7667
f72f237 fix Issue 3682 - Regression(2.038) is expression fails to match types
436b711 Fix issue 7667. ICE(interpret.c): 'ctfeStack.stackPointer() == 0'
9005276 Merge pull request #679 from yebblies/issue783
350a3ce Merge pull request #582 from 9rnsr/fix3382_ufcs
5f020c3 Merge pull request #788 from braddr/cleanup-backend3
6aa91cf Merge pull request #790 from p0nce/master
351d595 remove tls bracketing
a137d72 Fix bug #6391
6ce219c remove some of the bracketing
aec4c13 fix Issue 7578 - ICE on indexing result of vararg opDispatch
95e3dc1 Fix unintended infinite loop in Phobos build
b66196a fix Issue 3382 - [tdpl] Implement uniform function call syntax
ee2fe6c Fix 977 is with counting end-of-lines towards msot advanced lexer peeking
7790b16 fix Issue 7650 - Bad lambda inference in associative array literal
c03484e fix Issue 7649 - Bad lambda inference in default function argument
f293a10 fix Issue 7499 - [ICE] ('cast.c line 1495) with lambda array
9f0622c Expression::inferType() and remove FuncExp::setType()
cfc67b7 refactor lambda inference process
6d49586 more de-TX86'ing in relation to a bunch of OP codes
2efbf6a TX86-ectomy in evalu8.c
953f6d7 rip TX86 conditionals out of el.c
d5663c7 fix Issue 7595 - Data being overwritten.
449c165 Add predefined Ddoc macro SRCFILENAME
5c5da66 fix uninitialized field
29cde54 Merge pull request #783 from 9rnsr/fix7038
06d65ab fix Issue 7038 - Type mismatch with const struct
b77e2c9 fix Issue 7110 - opSlice() & opIndex functions works unstable as template arguments
a65f02f Merge pull request #781 from braddr/fix
08d6cd5 Merge pull request #782 from braddr/fixiasm
2492332 fix latent bug with Lexer::peek and recently introduced bug in Lexer::scan
ec1888e initialize popndTmp rather than rely on carefulness when usNumops == 0 and emitting a vector instruction, popndTmp is left uninitialized and is later dereferenced.
1d4a742 Merge pull request #766 from 9rnsr/fix7563
e1cd535 refactor
90f8dcf fix Issue 7641 - std.typecons.Proxy incorrectly allows implicit conversion to class
83a93cf Merge pull request #778 from dawgfoto/MoreSpellCorrection
7f0bcde 2nd go at fix issue 5590
567d7df fix Issue 5590 - Regression(2.036) ICE(e2ir.c): when using .values on enum which is associative array
48ea951 fix Issue 4820 - Regression(1.058, 2.044) in DStress caused by changeset 452
e8f9f3b more spell correction
afd9a45 fix Issue 7618 - delegate/function pointer call bypass parameter storage class
dabcdfb Merge pull request #773 from 9rnsr/fix7583
9846bb2 Merge pull request #774 from donc/ctfe7568
8c20445 Merge pull request #775 from donc/_error6785
d41e58e Avoiding shallow copy is more better.
cccef09 Revert "fix Issue 7585 - functions in templates inferred as delegate"
fc8dfc0 6785 Wrong error message from pragma(msg) of failed instantiation
61ec04d 7568 pragma(msg) segfaults with an aggregate including a class.
4d86d39 Merge pull request #767 from 9rnsr/fix7585
207d351 fix Issue 7583 - [CTFE] ICE with tuple and alias this
53bafa2 fix Issue 7411 - Deduce base type from vector types in templates
5ab1bd9 fix Issue 7518 - std.array.empty doesn't work for shared arrays
a1030d3 fix Issue 7554 - Immutable function pointer arguments too
5e96900 Merge pull request #771 from donc/bug7589
2287ebc fix Issue 7547 - -deps output lists object as a top level module
e611781 7589 __traits(compiles) does not work with a template that fails to compile
0113cde fix Issue 7585 - functions in templates inferred as delegate
4b978d5 fix Issue 7563 - Class members with default template arguments have no type
4d68981 fix Issue 7500 - [ICE] (template.c line 5287) with immutable lambda function
1a39c3c missed a line
6dd89ca Merge pull request #765 from 9rnsr/fix7525
8d6dcac fix Issue 7502 - 2.056 regression: Assigning .init takes forever to compile for large structs
042096e fix Issue 7525 - Broken return type inference for delegate returns
c5affa5 fix Issue 7582 - Untyped nested delegate literals don't compile
121677c fix Issue 7580 - Identity assignment of Nullable crashes dmd
adc0502 Small refactoring to resolve alias this.
1f52383 Merge pull request #671 from yebblies/issue4958
2a12345 fix build breakage
8755819 fix build
ba86204 fix vcbuild
464c664 fix linux build
31197c8 tweaked command line moved some inline asm to C-function to not interfere with optimizations build with VS2011
4dcdc9c increase stack size for win64 build
77262aa add missing include to root
56afe3f batch to build through win32.mak
5a0fd30 build through win32.mak
a5b5190 long_double -> longdouble remove C99 printf add Win64 support
9640110 vcbuild
b619171 Merge pull request #761 from donc/ctfe7473structref
7756328 Merge pull request #725 from kennytm/bug7399-import-too-fatal
bbac9e4 Merge pull request #759 from yebblies/issue1149
d1ff23b 7473 [CTFE] Non-ref argument behaves as if it's a ref argument
ab5cb18 Fix OPmsw codegen - integer only is too restrictive.
a00833b Merge pull request #743 from yebblies/issue3354
b006e11 Merge pull request #757 from 9rnsr/fix7562
3bccbb0 fix Issue 7562 - DMD crashes by using TemplateThisParameter
a7dc50e Merge pull request #749 from yebblies/issue1149
a873c5f Merge pull request #758 from 9rnsr/fix5525
5d639ec fix Issue 5525 - Eponymous templates should allow for overloaded eponymous members
f50852c Merge pull request #729 from donc/gag4269
de02523 fix Issue 3927 - array.length++; is an error, but ++array.length compiles
1dc5bfd Merge pull request #680 from yebblies/issue3812
cf887ba move errors to Dsymbol
fc4acf5 Merge pull request #755 from donc/seaOfErrors7557
be2f3a9 7557b soldier on through dottemplate expressions
8cec825 7557 Sea of errors after template failure
37ec6d6 A small fixup to call Type::defaultInitLiteral
7b5e2cb Revert "Revert "Merge pull request #41 from 9rnsr/rvalue-struct-literal""
3d8f09a Merge branch 'master' of github.com:D-Programming-Language/dmd
7dfb4cc Merge pull request #752 from braddr/cleanup-backend2
1b28f51 Merge branch 'master' of github.com:D-Programming-Language/dmd
31ad73c Merge pull request #746 from yebblies/issue5554
25f770d Change lexer to support # as a token, preserving #line's original behavior
dd8d20a Revert "Merge pull request #41 from 9rnsr/rvalue-struct-literal"
ee2fdf9 Merge pull request #41 from 9rnsr/rvalue-struct-literal
f94fdbf Merge pull request #750 from yebblies/issue3630
61f5fcf Improve codegen for OPmsw
05a3fa4 Merge pull request #744 from Safety0ff/avx-fix
0231d6a Merge pull request #748 from 9rnsr/fix7552
9a97979 Merge pull request #751 from donc/ctfe7536
e091e6e 7536 ctfeAdrOnStack triggered
c9edaf4 fix Issue 7552 - Cannot get and combine a part of overloaded functions
1edeba9 Fix Issue 3630 - bad error location in "has no effect in expression" error
7d0fb72 Fix Issue 5554 - [qtd] Covariance detection failure
4f36aca fix Issue 7550 - Missing AVX instruction VPMULDQ
0b82dfe Fix Issue 5879 - Not all frontend errors use stderr
963a41a Merge pull request #695 from yebblies/refactor_expression
3f06690 Fix Issue 3354 - asm fld x, ST(6); accepted
713f69f Merge pull request #677 from yebblies/issue4241
cf22ce3 Merge pull request #711 from yebblies/issue3559
56ca73c Merge pull request #700 from kennytm/bug7452_lazy_safe
c4dc723 Merge pull request #736 from ibuclaw/in_gcc
121c9b9 Merge pull request #737 from yebblies/issue7544
cedcb3c Merge pull request #740 from yebblies/issue7545
fb3e8f2 Merge pull request #741 from dawgfoto/DMCWarning
5d26c1e Merge pull request #735 from 9rnsr/fix7105
734a921 dmc warning
1e1cfbc Fix Issue 7545 - ICE(cast.c) Merge integral types through alias this
6b135be Fix Issue 7544 - ICE(interpret.c) Catching an exception with a null catch block
c5336f9 Update already existing gdc-specific code, harmonise headers.
44b8d59 Merge pull request #703 from kennytm/bug435_template_ctor
6b368e1 Merge pull request #707 from yebblies/issue3822
8439e07 Merge pull request #717 from yebblies/issue6611
2b4502e fix Issue 7105 - relax inout rules
ac4463a wildsubparam isn't need anymore, because it works properly.
f77879a Issue 6611 - better error message for array post increment/decrement
7393395 Merge pull request #716 from yebblies/issue6685
77568f0 Merge pull request #719 from yebblies/issue4536
9accb04 tired of tdata()
5fbd5a2 Merge pull request #732 from dawgfoto/fix5412
41a901a Revert "hide private/package module level symbols"
23d5e14 Merge pull request #733 from dawgfoto/HideModuleMembers
e2f8a23 hide private/package module level symbols
ae75287 detect collisions with renamed imports
75a2442 fix Dsymbol::search_correct
50e122a Merge pull request #723 from kennytm/bug7504_null_array
c5b7601 Revert "fix 7494 - selective imports in function scope"
aa6f4d9 Revert "fix Protection"
5be660e Revert "fix Imports"
040371b Revert "detect collisions with renamed imports"
0159818 Revert "find private symbols during spell correction"
0c95c45 find private symbols during spell correction
ca22fb2 detect collisions with renamed imports
0dca0af fix Imports
37d4fda fix Protection
16a2e7e fix 7494 - selective imports in function scope
c16f5b2 Merge pull request #667 from 9rnsr/fix7406
f776617 explanatory comments belong in the code, not bugzilla
bfa2060 Merge pull request #704 from donc/_error6699
f46705c fix fail222 regression
28d9635 Merge pull request #708 from donc/soldieron7481
2c2a7af Merge pull request #715 from 9rnsr/fix6738
98cfa64 Merge pull request #722 from 9rnsr/fix7353
b040567 revert pull 724
0e84f63 revert part of pull 724
400f702 Merge pull request #724 from yebblies/issue3632
d82cc74 Merge pull request #720 from yebblies/issue3279
2da3bed Merge pull request #718 from yebblies/fixdebugmsg
f6627ec 7527 [CTFE] Segfault when slicing a pointer at compile time
c8f09bf 4269a Regression(2.031): invalid type accepted if evaluated while errors are gagged
d10fba0 implement const/purity/nothrow/@safe inheritance
ad689fb Fix bug 7399: Broken import statement in trySemantic() causes silent compiler error
eb0c643 Add global.speculativeGag
c18220a Refactor isSpeculativeFunction into Dsymbol
f5c56d8 Issue 3632 - modify float is float to do a bitwise compare
af1cab4 Issue 7353 - NRVO not properly working with inferred return type
03ee438 Fix bug 7504: Cannot assign an object of type 'typeof(null)' to an array
dfb941c Remove debug printing in code that generates errors.
62118e3 Issue 4536 - Typetuples (T...) should have an .init member
989da7b Issue 3279 - Confusing error message when comparing types
36e8045 Issue 6685 - Allow using "with" with rvalues
60cbc6f fix issue 6738 revisited
4e20e7d Issue 3822 - Invalid optimization of alloca called with constant size
b37bf8c Fixes bug 435: Constructors should be templatized
ad8157d Issue 3559 - DMD 1.048+ fails to take function pointer from overloaded member functions
838cd06 7481 Compiler should 'soldier on' after template errors
673063e Simplify fix for 6699
1a0b199 6699a __error when instantiating function template
b6d072d 6699b __error in alias expression
df16ffa 6699c __error in synchronized error message
338f804 7462 Error message with _error_ in overridden function
0f60bd3 7463 Duplicated error message with bad template value parameter
f43e93a 6699E: _error inside error msg for bad base class
5109a5a Fixes bug 7452.
04d888f Refactor XxxAssignExp semantic
73973d6 Issue 3812 - Missing line number for implicit cast of variadic function to array
f0bbf18 Issue 3927 - array.length++; is an error, but ++array.length compiles
24576c2 Issue 783 - Cannot use an array w/ const or variable index as new[] size argument.
7e4cd4b Issue 4241 - duplicate union initialization error doesn't give a file location
9987127 Issue 4958 - Floating point enums should check for total loss of precision
60287fd Issue 7406 - tuple foreach doesn't work with mixed tuples
633d88e Issue 5889 - Struct literal/construction should be rvalue
5d5f78a Now function overloading with ref and non-ref parameter is legal for struct type

git-subtree-dir: dmd2
git-subtree-split: 3443f38fc4c17807a0f36005a05d598cfc7301db
2012-04-05 11:45:25 +04:00

6459 lines
207 KiB
C
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright (C) 1984-1998 by Symantec
// Copyright (C) 2000-2011 by Digital Mars
// All Rights Reserved
// http://www.digitalmars.com
// Written by Walter Bright
/*
* This source file is made available for personal use
* only. The license is in /dmd/src/dmd/backendlicense.txt
* or /dm/src/dmd/backendlicense.txt
* For any other uses, please contact Digital Mars.
*/
#if !SPP
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include "cc.h"
#include "el.h"
#include "code.h"
#include "oper.h"
#include "global.h"
#include "type.h"
#include "tinfo.h"
#if SCPP
#include "exh.h"
#endif
#if HYDRATE
#include "parser.h"
#endif
static char __file__[] = __FILE__; /* for tassert.h */
#include "tassert.h"
extern targ_size_t retsize;
STATIC void pinholeopt_unittest();
STATIC void do8bit (enum FL,union evc *);
STATIC void do16bit (enum FL,union evc *,int);
STATIC void do32bit (enum FL,union evc *,int,targ_size_t = 0);
STATIC void do64bit (enum FL,union evc *,int);
static int hasframe; /* !=0 if this function has a stack frame */
static targ_size_t Foff; // BP offset of floating register
static targ_size_t CSoff; // offset of common sub expressions
static targ_size_t NDPoff; // offset of saved 8087 registers
int BPoff; // offset from BP
static int EBPtoESP; // add to EBP offset to get ESP offset
static int AAoff; // offset of alloca temporary
#if ELFOBJ || MACHOBJ
#define JMPSEG CDATA
#define JMPOFF CDoffset
#else
#define JMPSEG DATA
#define JMPOFF Doffset
#endif
/*************
* Size in bytes of each instruction.
* 0 means illegal instruction.
* bit M: if there is a modregrm field (EV1 is reserved for modregrm)
* bit T: if there is a second operand (EV2)
* bit E: if second operand is only 8 bits
* bit A: a short version exists for the AX reg
* bit R: a short version exists for regs
* bits 2..0: size of instruction (excluding optional bytes)
*/
#define M 0x80
#define T 0x40
#define E 0x20
#define A 0x10
#define R 0x08
#define W 0
static unsigned char inssize[256] =
{ M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 00 */
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 08 */
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 10 */
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 18 */
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 20 */
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 28 */
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 30 */
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 38 */
1,1,1,1, 1,1,1,1, /* 40 */
1,1,1,1, 1,1,1,1, /* 48 */
1,1,1,1, 1,1,1,1, /* 50 */
1,1,1,1, 1,1,1,1, /* 58 */
1,1,M|2,M|2, 1,1,1,1, /* 60 */
T|3,M|T|4,T|E|2,M|T|E|3, 1,1,1,1, /* 68 */
T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 70 */
T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 78 */
M|T|E|A|3,M|T|A|4,M|T|E|3,M|T|E|3, M|2,M|2,M|2,M|A|R|2, /* 80 */
M|A|2,M|A|2,M|A|2,M|A|2, M|2,M|2,M|2,M|R|2, /* 88 */
1,1,1,1, 1,1,1,1, /* 90 */
1,1,T|5,1, 1,1,1,1, /* 98 */
#if 0 /* cod3_set32() patches this */
T|5,T|5,T|5,T|5, 1,1,1,1, /* A0 */
#else
T|3,T|3,T|3,T|3, 1,1,1,1, /* A0 */
#endif
T|E|2,T|3,1,1, 1,1,1,1, /* A8 */
T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* B0 */
T|3,T|3,T|3,T|3, T|3,T|3,T|3,T|3, /* B8 */
M|T|E|3,M|T|E|3,T|3,1, M|2,M|2,M|T|E|R|3,M|T|R|4, /* C0 */
T|E|4,1,T|3,1, 1,T|E|2,1,1, /* C8 */
M|2,M|2,M|2,M|2, T|E|2,T|E|2,0,1, /* D0 */
/* For the floating instructions, allow room for the FWAIT */
M|2,M|2,M|2,M|2, M|2,M|2,M|2,M|2, /* D8 */
T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* E0 */
T|3,T|3,T|5,T|E|2, 1,1,1,1, /* E8 */
1,0,1,1, 1,1,M|A|2,M|A|2, /* F0 */
1,1,1,1, 1,1,M|2,M|R|2 /* F8 */
};
static const unsigned char inssize32[256] =
{ 2,2,2,2, 2,5,1,1, /* 00 */
2,2,2,2, 2,5,1,1, /* 08 */
2,2,2,2, 2,5,1,1, /* 10 */
2,2,2,2, 2,5,1,1, /* 18 */
2,2,2,2, 2,5,1,1, /* 20 */
2,2,2,2, 2,5,1,1, /* 28 */
2,2,2,2, 2,5,1,1, /* 30 */
2,2,2,2, 2,5,1,1, /* 38 */
1,1,1,1, 1,1,1,1, /* 40 */
1,1,1,1, 1,1,1,1, /* 48 */
1,1,1,1, 1,1,1,1, /* 50 */
1,1,1,1, 1,1,1,1, /* 58 */
1,1,2,2, 1,1,1,1, /* 60 */
5,6,2,3, 1,1,1,1, /* 68 */
2,2,2,2, 2,2,2,2, /* 70 */
2,2,2,2, 2,2,2,2, /* 78 */
3,6,3,3, 2,2,2,2, /* 80 */
2,2,2,2, 2,2,2,2, /* 88 */
1,1,1,1, 1,1,1,1, /* 90 */
1,1,7,1, 1,1,1,1, /* 98 */
5,5,5,5, 1,1,1,1, /* A0 */
2,5,1,1, 1,1,1,1, /* A8 */
2,2,2,2, 2,2,2,2, /* B0 */
5,5,5,5, 5,5,5,5, /* B8 */
3,3,3,1, 2,2,3,6, /* C0 */
4,1,3,1, 1,2,1,1, /* C8 */
2,2,2,2, 2,2,0,1, /* D0 */
/* For the floating instructions, don't need room for the FWAIT */
2,2,2,2, 2,2,2,2, /* D8 */
2,2,2,2, 2,2,2,2, /* E0 */
5,5,7,2, 1,1,1,1, /* E8 */
1,0,1,1, 1,1,2,2, /* F0 */
1,1,1,1, 1,1,2,2 /* F8 */
};
/* For 2 byte opcodes starting with 0x0F */
static unsigned char inssize2[256] =
{ M|3,M|3,M|3,M|3, 2,2,2,2, // 00
2,2,M|3,2, 2,2,2,M|T|E|4, // 08
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 10
M|3,2,2,2, 2,2,2,2, // 18
M|3,M|3,M|3,M|3, M|3,2,M|3,2, // 20
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 28
2,2,2,2, 2,2,2,2, // 30
M|4,2,M|T|E|5,2, 2,2,2,2, // 38
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 40
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 48
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 50
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 58
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 60
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 68
M|T|E|4,M|T|E|4,M|T|E|4,M|T|E|4, M|3,M|3,M|3,2, // 70
2,2,2,2, M|3,M|3,M|3,M|3, // 78
W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 80
W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 88
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 90
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 98
2,2,2,M|3, M|T|E|4,M|3,2,2, // A0
2,2,2,M|3, M|T|E|4,M|3,M|3,M|3, // A8
M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // B0
M|3,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3, // B8
M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3, // C0
2,2,2,2, 2,2,2,2, // C8
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D0
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D8
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E0
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E8
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // F0
M|3,M|3,M|3,M|3, M|3,M|3,M|3,2 // F8
};
/*************************************************
* Allocate register temporaries
*/
code *REGSAVE::save(code *c, int reg, unsigned *pidx)
{
unsigned i;
if (reg >= XMM0)
{
alignment = 16;
idx = (idx + 15) & ~15;
i = idx;
idx += 16;
// MOVD idx[RBP],xmm
c = genc1(c,0xF20F11,modregxrm(2, reg - XMM0, BPRM),FLregsave,(targ_uns) i);
}
else
{
if (!alignment)
alignment = REGSIZE;
i = idx;
idx += REGSIZE;
// MOV idx[RBP],reg
c = genc1(c,0x89,modregxrm(2, reg, BPRM),FLregsave,(targ_uns) i);
if (I64)
code_orrex(c, REX_W);
}
reflocal = TRUE;
if (idx > top)
top = idx; // keep high water mark
*pidx = i;
return c;
}
code *REGSAVE::restore(code *c, int reg, unsigned idx)
{
if (reg >= XMM0)
{
assert(alignment == 16);
// MOVD xmm,idx[RBP]
c = genc1(c,0xF20F10,modregxrm(2, reg - XMM0, BPRM),FLregsave,(targ_uns) idx);
}
else
{ // MOV reg,idx[RBP]
c = genc1(c,0x8B,modregxrm(2, reg, BPRM),FLregsave,(targ_uns) idx);
if (I64)
code_orrex(c, REX_W);
}
return c;
}
/************************************
* Size for vex encoded instruction.
*/
unsigned char vex_inssize(code *c)
{
assert(c->Iflags & CFvex);
unsigned char ins;
if (c->Iflags & CFvex3)
{
switch (c->Ivex.mmmm)
{
case 0: // no prefix
case 1: // 0F
ins = inssize2[c->Ivex.op] + 2;
break;
case 2: // 0F 38
ins = inssize2[0x38] + 1;
break;
case 3: // 0F 3A
ins = inssize2[0x3A] + 1;
break;
default:
assert(0);
}
}
else
{
ins = inssize2[c->Ivex.op] + 1;
}
return ins;
}
/************************************
* Determine if there is a modregrm byte for code.
*/
int cod3_EA(code *c)
{ unsigned ins;
unsigned op1 = c->Iop & 0xFF;
if (op1 == ESCAPE)
ins = 0;
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
ins = inssize2[(c->Iop >> 8) & 0xFF];
else if ((c->Iop & 0xFF00) == 0x0F00)
ins = inssize2[op1];
else
ins = inssize[op1];
return ins & M;
}
/********************************
* Fix global variables for 386.
*/
void cod3_set32()
{
inssize[0xA0] = T|5;
inssize[0xA1] = T|5;
inssize[0xA2] = T|5;
inssize[0xA3] = T|5;
BPRM = 5; /* [EBP] addressing mode */
fregsaved = mBP | mBX | mSI | mDI; // saved across function calls
FLOATREGS = FLOATREGS_32;
FLOATREGS2 = FLOATREGS2_32;
DOUBLEREGS = DOUBLEREGS_32;
if (config.flags3 & CFG3eseqds)
fregsaved |= mES;
for (unsigned i = 0x80; i < 0x90; i++)
inssize2[i] = W|T|6;
}
/********************************
* Fix global variables for I64.
*/
void cod3_set64()
{
inssize[0xA0] = T|5; // MOV AL,mem
inssize[0xA1] = T|5; // MOV RAX,mem
inssize[0xA2] = T|5; // MOV mem,AL
inssize[0xA3] = T|5; // MOV mem,RAX
BPRM = 5; // [RBP] addressing mode
fregsaved = mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES; // saved across function calls
FLOATREGS = FLOATREGS_64;
FLOATREGS2 = FLOATREGS2_64;
DOUBLEREGS = DOUBLEREGS_64;
STACKALIGN = 16;
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15;
BYTEREGS = ALLREGS;
#endif
for (unsigned i = 0x80; i < 0x90; i++)
inssize2[i] = W|T|6;
}
/*********************************
* Word or dword align start of function.
*/
void cod3_align()
{
static unsigned char nops[7] = { 0x90,0x90,0x90,0x90,0x90,0x90,0x90 };
unsigned nbytes;
#if OMFOBJ
if (config.flags4 & CFG4speed) // if optimized for speed
{
// Pick alignment based on CPU target
if (config.target_cpu == TARGET_80486 ||
config.target_cpu >= TARGET_PentiumPro)
{ // 486 does reads on 16 byte boundaries, so if we are near
// such a boundary, align us to it
nbytes = -Coffset & 15;
if (nbytes < 8)
{
Coffset += obj_bytes(cseg,Coffset,nbytes,nops); // XCHG AX,AX
}
}
}
#else
nbytes = -Coffset & 3;
//dbg_printf("cod3_align Coffset %x nbytes %d\n",Coffset,nbytes);
obj_bytes(cseg,Coffset,nbytes,nops);
#endif
}
/*****************************
* Given a type, return a mask of
* registers to hold that type.
* Input:
* tyf function type
*/
regm_t regmask(tym_t tym, tym_t tyf)
{
switch (tybasic(tym))
{
case TYvoid:
case TYstruct:
return 0;
case TYbool:
case TYwchar_t:
case TYchar16:
case TYchar:
case TYschar:
case TYuchar:
case TYshort:
case TYushort:
case TYint:
case TYuint:
#if JHANDLE
case TYjhandle:
#endif
case TYnullptr:
case TYnptr:
#if TARGET_SEGMENTED
case TYsptr:
case TYcptr:
#endif
return mAX;
case TYfloat:
case TYifloat:
if (I64)
return mXMM0;
if (config.exe & EX_flat)
return mST0;
case TYlong:
case TYulong:
case TYdchar:
if (!I16)
return mAX;
#if TARGET_SEGMENTED
case TYfptr:
case TYhptr:
#endif
return mDX | mAX;
case TYcent:
case TYucent:
assert(I64);
return mDX | mAX;
#if TARGET_SEGMENTED
case TYvptr:
return mDX | mBX;
#endif
case TYdouble:
case TYdouble_alias:
case TYidouble:
if (I64)
return mXMM0;
if (config.exe & EX_flat)
return mST0;
return DOUBLEREGS;
case TYllong:
case TYullong:
return I64 ? mAX : (I32 ? mDX | mAX : DOUBLEREGS);
case TYldouble:
case TYildouble:
return mST0;
case TYcfloat:
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
if (I32 && tybasic(tyf) == TYnfunc)
return mDX | mAX;
#endif
case TYcdouble:
if (I64)
return mXMM0 | mXMM1;
case TYcldouble:
return mST01;
// SIMD vector types
case TYfloat4:
case TYdouble2:
case TYschar16:
case TYuchar16:
case TYshort8:
case TYushort8:
case TYlong4:
case TYulong4:
case TYllong2:
case TYullong2:
if (!config.fpxmmregs)
{ printf("SIMD operations not supported on this platform\n");
exit(1);
}
return mXMM0;
default:
#if DEBUG
WRTYxx(tym);
#endif
assert(0);
return 0;
}
}
/*******************************
* Generate block exit code
*/
void outblkexitcode(block *bl, code*& c, int& anyspill, const char* sflsave, symbol** retsym, const regm_t mfuncregsave)
{
elem *e = bl->Belem;
block *nextb;
block *bs1,*bs2;
regm_t retregs = 0;
bool jcond;
switch (bl->BC) /* block exit condition */
{
case BCiftrue:
jcond = TRUE;
bs1 = list_block(bl->Bsucc);
bs2 = list_block(list_next(bl->Bsucc));
if (bs1 == bl->Bnext)
{ // Swap bs1 and bs2
block *btmp;
jcond ^= 1;
btmp = bs1;
bs1 = bs2;
bs2 = btmp;
}
c = cat(c,logexp(e,jcond,FLblock,(code *) bs1));
nextb = bs2;
bl->Bcode = NULL;
L2:
if (nextb != bl->Bnext)
{ if (configv.addlinenumbers && bl->Bsrcpos.Slinnum &&
!(funcsym_p->ty() & mTYnaked))
cgen_linnum(&c,bl->Bsrcpos);
assert(!(bl->Bflags & BFLepilog));
c = cat(c,genjmp(CNIL,JMP,FLblock,nextb));
}
bl->Bcode = cat(bl->Bcode,c);
break;
case BCjmptab:
case BCifthen:
case BCswitch:
assert(!(bl->Bflags & BFLepilog));
doswitch(bl); /* hide messy details */
bl->Bcode = cat(c,bl->Bcode);
break;
#if MARS
case BCjcatch:
// Mark all registers as destroyed. This will prevent
// register assignments to variables used in catch blocks.
c = cat(c,getregs((I32 | I64) ? allregs : (ALLREGS | mES)));
#if 0 && TARGET_LINUX
if (config.flags3 & CFG3pic && !(allregs & mBX))
{
c = cat(c, cod3_load_got());
}
#endif
goto case_goto;
#endif
#if SCPP
case BCcatch:
// Mark all registers as destroyed. This will prevent
// register assignments to variables used in catch blocks.
c = cat(c,getregs(allregs | mES));
#if 0 && TARGET_LINUX
if (config.flags3 & CFG3pic && !(allregs & mBX))
{
c = cat(c, cod3_load_got());
}
#endif
goto case_goto;
case BCtry:
usednteh |= EHtry;
if (config.flags2 & CFG2seh)
usednteh |= NTEHtry;
goto case_goto;
#endif
case BCgoto:
nextb = list_block(bl->Bsucc);
if ((funcsym_p->Sfunc->Fflags3 & Fnteh ||
(MARS /*&& config.flags2 & CFG2seh*/)) &&
bl->Btry != nextb->Btry &&
nextb->BC != BC_finally)
{ int toindex;
int fromindex;
bl->Bcode = NULL;
c = gencodelem(c,e,&retregs,TRUE);
toindex = nextb->Btry ? nextb->Btry->Bscope_index : -1;
assert(bl->Btry);
fromindex = bl->Btry->Bscope_index;
#if MARS
if (toindex + 1 == fromindex)
{ // Simply call __finally
if (bl->Btry &&
list_block(list_next(bl->Btry->Bsucc))->BC == BCjcatch)
{
goto L2;
}
}
#endif
if (config.flags2 & CFG2seh)
c = cat(c,nteh_unwind(0,toindex));
#if MARS && (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS)
else if (toindex + 1 <= fromindex)
{
//c = cat(c, linux_unwind(0, toindex));
block *bt;
//printf("B%d: fromindex = %d, toindex = %d\n", bl->Bdfoidx, fromindex, toindex);
bt = bl;
while ((bt = bt->Btry) != NULL && bt->Bscope_index != toindex)
{ block *bf;
//printf("\tbt->Bscope_index = %d, bt->Blast_index = %d\n", bt->Bscope_index, bt->Blast_index);
bf = list_block(list_next(bt->Bsucc));
// Only look at try-finally blocks
if (bf->BC == BCjcatch)
continue;
if (bf == nextb)
continue;
//printf("\tbf = B%d, nextb = B%d\n", bf->Bdfoidx, nextb->Bdfoidx);
if (nextb->BC == BCgoto &&
!nextb->Belem &&
bf == list_block(nextb->Bsucc))
continue;
// call __finally
code *cs;
code *cr;
int nalign = 0;
gensaverestore(retregs,&cs,&cr);
if (STACKALIGN == 16)
{ int npush = (numbitsset(retregs) + 1) * REGSIZE;
if (npush & (STACKALIGN - 1))
{ nalign = STACKALIGN - (npush & (STACKALIGN - 1));
cs = genc2(cs,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign
if (I64)
code_orrex(cs, REX_W);
}
}
cs = genc(cs,0xE8,0,0,0,FLblock,(targ_size_t)list_block(bf->Bsucc));
if (nalign)
{ cs = genc2(cs,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign
if (I64)
code_orrex(cs, REX_W);
}
c = cat3(c,cs,cr);
}
}
#endif
goto L2;
}
case_goto:
c = gencodelem(c,e,&retregs,TRUE);
if (anyspill)
{ // Add in the epilog code
code *cstore = NULL;
code *cload = NULL;
for (int i = 0; i < anyspill; i++)
{ symbol *s = globsym.tab[i];
if (s->Sflags & SFLspill &&
vec_testbit(dfoidx,s->Srange))
{
s->Sfl = sflsave[i]; // undo block register assignments
cgreg_spillreg_epilog(bl,s,&cstore,&cload);
}
}
c = cat3(c,cstore,cload);
}
L3:
bl->Bcode = NULL;
nextb = list_block(bl->Bsucc);
goto L2;
case BC_try:
if (config.flags2 & CFG2seh)
{ usednteh |= NTEH_try;
nteh_usevars();
}
else
usednteh |= EHtry;
goto case_goto;
case BC_finally:
// Mark all registers as destroyed. This will prevent
// register assignments to variables used in finally blocks.
assert(!getregs(allregs));
assert(!e);
assert(!bl->Bcode);
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
if (config.flags3 & CFG3pic)
{
int nalign = 0;
if (STACKALIGN == 16)
{ nalign = STACKALIGN - REGSIZE;
c = genc2(c,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign
if (I64)
code_orrex(c, REX_W);
}
// CALL bl->Bsucc
c = genc(c,0xE8,0,0,0,FLblock,(targ_size_t)list_block(bl->Bsucc));
if (nalign)
{ c = genc2(c,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign
if (I64)
code_orrex(c, REX_W);
}
// JMP list_next(bl->Bsucc)
nextb = list_block(list_next(bl->Bsucc));
goto L2;
}
else
#endif
{
// Generate a PUSH of the address of the successor to the
// corresponding BC_ret
//assert(list_block(list_next(bl->Bsucc))->BC == BC_ret);
// PUSH &succ
c = genc(c,0x68,0,0,0,FLblock,(targ_size_t)list_block(list_next(bl->Bsucc)));
nextb = list_block(bl->Bsucc);
goto L2;
}
case BC_ret:
c = gencodelem(c,e,&retregs,TRUE);
bl->Bcode = gen1(c,0xC3); // RET
break;
#if NTEXCEPTIONS
case BC_except:
assert(!e);
usednteh |= NTEH_except;
c = cat(c,nteh_setsp(0x8B));
getregs(allregs);
goto L3;
case BC_filter:
c = cat(c,nteh_filter(bl));
// Mark all registers as destroyed. This will prevent
// register assignments to variables used in filter blocks.
getregs(allregs);
retregs = regmask(e->Ety, TYnfunc);
c = gencodelem(c,e,&retregs,TRUE);
bl->Bcode = gen1(c,0xC3); // RET
break;
#endif
case BCretexp:
retregs = regmask(e->Ety, funcsym_p->ty());
// For the final load into the return regs, don't set regcon.used,
// so that the optimizer can potentially use retregs for register
// variable assignments.
if (config.flags4 & CFG4optimized)
{ regm_t usedsave;
c = cat(c,docommas(&e));
usedsave = regcon.used;
if (EOP(e))
c = gencodelem(c,e,&retregs,TRUE);
else
{
if (e->Eoper == OPconst)
regcon.mvar = 0;
c = gencodelem(c,e,&retregs,TRUE);
regcon.used = usedsave;
if (e->Eoper == OPvar)
{ symbol *s = e->EV.sp.Vsym;
if (s->Sfl == FLreg && s->Sregm != mAX)
*retsym = s;
}
}
}
else
{
case BCret:
case BCexit:
c = gencodelem(c,e,&retregs,TRUE);
}
bl->Bcode = c;
if (retregs == mST0)
{ assert(stackused == 1);
pop87(); // account for return value
}
else if (retregs == mST01)
{ assert(stackused == 2);
pop87();
pop87(); // account for return value
}
if (bl->BC == BCexit && config.flags4 & CFG4optimized)
mfuncreg = mfuncregsave;
if (MARS || usednteh & NTEH_try)
{ block *bt;
bt = bl;
while ((bt = bt->Btry) != NULL)
{ block *bf;
bf = list_block(list_next(bt->Bsucc));
#if MARS
// Only look at try-finally blocks
if (bf->BC == BCjcatch)
{
continue;
}
#endif
if (config.flags2 & CFG2seh)
{
if (bt->Bscope_index == 0)
{
// call __finally
code *cs;
code *cr;
c = cat(c,nteh_gensindex(-1));
gensaverestore(retregs,&cs,&cr);
cs = genc(cs,0xE8,0,0,0,FLblock,(targ_size_t)list_block(bf->Bsucc));
bl->Bcode = cat3(c,cs,cr);
}
else
bl->Bcode = cat(c,nteh_unwind(retregs,~0));
break;
}
else
{
// call __finally
code *cs;
code *cr;
int nalign = 0;
gensaverestore(retregs,&cs,&cr);
if (STACKALIGN == 16)
{ int npush = (numbitsset(retregs) + 1) * REGSIZE;
if (npush & (STACKALIGN - 1))
{ nalign = STACKALIGN - (npush & (STACKALIGN - 1));
cs = genc2(cs,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign
if (I64)
code_orrex(cs, REX_W);
}
}
// CALL bf->Bsucc
cs = genc(cs,0xE8,0,0,0,FLblock,(targ_size_t)list_block(bf->Bsucc));
if (nalign)
{ cs = genc2(cs,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign
if (I64)
code_orrex(cs, REX_W);
}
bl->Bcode = c = cat3(c,cs,cr);
}
}
}
break;
#if SCPP || MARS
case BCasm:
assert(!e);
// Mark destroyed registers
assert(!c);
c = cat(c,getregs(iasm_regs(bl)));
if (bl->Bsucc)
{ nextb = list_block(bl->Bsucc);
if (!bl->Bnext)
goto L2;
if (nextb != bl->Bnext &&
bl->Bnext &&
!(bl->Bnext->BC == BCgoto &&
!bl->Bnext->Belem &&
nextb == list_block(bl->Bnext->Bsucc)))
{ code *cl;
// See if already have JMP at end of block
cl = code_last(bl->Bcode);
if (!cl || cl->Iop != JMP)
goto L2; // add JMP at end of block
}
}
break;
#endif
default:
#ifdef DEBUG
printf("bl->BC = %d\n",bl->BC);
#endif
assert(0);
}
}
/*******************************
* Generate code for blocks ending in a switch statement.
* Take BCswitch and decide on
* BCifthen use if - then code
* BCjmptab index into jump table
* BCswitch search table for match
*/
void doswitch(block *b)
{ code *cc,*c,*ce;
regm_t retregs;
unsigned ncases,n,reg,reg2,rm;
targ_llong vmax,vmin,val;
targ_llong *p;
list_t bl;
elem *e;
tym_t tys;
int sz;
unsigned char dword;
unsigned char mswsame;
#if LONGLONG
targ_ulong msw;
#else
unsigned msw;
#endif
e = b->Belem;
elem_debug(e);
cc = docommas(&e);
cgstate.stackclean++;
tys = tybasic(e->Ety);
sz = tysize[tys];
dword = (sz == 2 * REGSIZE);
mswsame = 1; // assume all msw's are the same
p = b->BS.Bswitch; /* pointer to case data */
assert(p);
ncases = *p++; /* number of cases */
vmax = MINLL; // smallest possible llong
vmin = MAXLL; // largest possible llong
for (n = 0; n < ncases; n++) // find max and min case values
{ val = *p++;
if (val > vmax) vmax = val;
if (val < vmin) vmin = val;
if (REGSIZE == 2)
{
unsigned short ms = (val >> 16) & 0xFFFF;
if (n == 0)
msw = ms;
else if (msw != ms)
mswsame = 0;
}
else // REGSIZE == 4
{
targ_ulong ms = (val >> 32) & 0xFFFFFFFF;
if (n == 0)
msw = ms;
else if (msw != ms)
mswsame = 0;
}
}
p -= ncases;
//dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin);
if (I64)
{ // For now, just generate basic if-then sequence to get us running
retregs = ALLREGS;
b->BC = BCifthen;
c = scodelem(e,&retregs,0,TRUE);
assert(!dword); // 128 bit switches not supported
reg = findreg(retregs); // reg that result is in
bl = b->Bsucc;
for (n = 0; n < ncases; n++)
{ code *cx;
val = *p;
if (sz == 4)
cx = genc2(CNIL,0x81,modregrmx(3,7,reg),val); // CMP reg,val
else if (sz == 8)
{
if (val == (int)val) // if val is a 64 bit value sign-extended from 32 bits
{
cx = genc2(CNIL,0x81,modregrmx(3,7,reg),val); // CMP reg,value32
cx->Irex |= REX_W; // 64 bit operand
}
else
{ unsigned sreg;
// MOV sreg,value64
cx = regwithvalue(CNIL, ALLREGS & ~mask[reg], val, &sreg, 64);
cx = genregs(cx,0x3B,reg,sreg); // CMP reg,sreg
code_orrex(cx, REX_W);
}
}
else
assert(0);
bl = list_next(bl);
genjmp(cx,JE,FLblock,list_block(bl)); // JE caseaddr
c = cat(c,cx);
p++;
}
if (list_block(b->Bsucc) != b->Bnext) /* if default is not next block */
c = cat(c,genjmp(CNIL,JMP,FLblock,list_block(b->Bsucc)));
ce = NULL;
}
// Need to do research on MACHOBJ to see about better methods
else if (MACHOBJ || ncases <= 3)
{ // generate if-then sequence
retregs = ALLREGS;
L1:
b->BC = BCifthen;
c = scodelem(e,&retregs,0,TRUE);
if (dword)
{ reg = findreglsw(retregs);
reg2 = findregmsw(retregs);
}
else
reg = findreg(retregs); /* reg that result is in */
bl = b->Bsucc;
if (dword && mswsame)
{ /* CMP reg2,MSW */
c = genc2(c,0x81,modregrm(3,7,reg2),msw);
genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
}
for (n = 0; n < ncases; n++)
{ code *cnext = CNIL;
/* CMP reg,casevalue */
c = cat(c,ce = genc2(CNIL,0x81,modregrm(3,7,reg),(targ_int)*p));
if (dword && !mswsame)
{
cnext = gennop(CNIL);
genjmp(ce,JNE,FLcode,(block *) cnext);
genc2(ce,0x81,modregrm(3,7,reg2),MSREG(*p));
}
bl = list_next(bl);
/* JE caseaddr */
genjmp(ce,JE,FLblock,list_block(bl));
c = cat(c,cnext);
p++;
}
if (list_block(b->Bsucc) != b->Bnext) /* if default is not next block */
c = cat(c,genjmp(CNIL,JMP,FLblock,list_block(b->Bsucc)));
ce = NULL;
}
#if TARGET_WINDOS // try and find relocation to support this
else if ((targ_ullong)(vmax - vmin) <= ncases * 2) // then use jump table
{ int modify;
b->BC = BCjmptab;
retregs = IDXREGS;
if (dword)
retregs |= mMSW;
modify = (vmin || !I32);
c = scodelem(e,&retregs,0,!modify);
reg = findreg(retregs & IDXREGS); /* reg that result is in */
if (dword)
reg2 = findregmsw(retregs);
if (modify)
{
assert(!(retregs & regcon.mvar));
c = cat(c,getregs(retregs));
}
if (vmin) /* if there is a minimum */
{
c = genc2(c,0x81,modregrm(3,5,reg),vmin); /* SUB reg,vmin */
if (dword)
{ genc2(c,0x81,modregrm(3,3,reg2),MSREG(vmin)); // SBB reg2,vmin
genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
}
}
else if (dword)
{ c = gentstreg(c,reg2); // TEST reg2,reg2
genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
}
if (vmax - vmin != REGMASK) /* if there is a maximum */
{ /* CMP reg,vmax-vmin */
c = genc2(c,0x81,modregrm(3,7,reg),vmax-vmin);
genjmp(c,JA,FLblock,list_block(b->Bsucc)); /* JA default */
}
if (!I32)
c = gen2(c,0xD1,modregrm(3,4,reg)); /* SHL reg,1 */
if (I32)
{
ce = genc1(CNIL,0xFF,modregrm(0,4,4),FLswitch,0); /* JMP [CS:]disp[idxreg*4] */
ce->Isib = modregrm(2,reg,5);
}
else
{ rm = getaddrmode(retregs) | modregrm(0,4,0);
ce = genc1(CNIL,0xFF,rm,FLswitch,0); /* JMP [CS:]disp[idxreg] */
}
int flags = (config.flags & CFGromable) ? CFcs : 0; // table is in code seg
ce->Iflags |= flags; // segment override
ce->IEV1.Vswitch = b;
b->Btablesize = (int) (vmax - vmin + 1) * tysize[TYnptr];
}
#endif
else /* else use switch table (BCswitch) */
{ targ_size_t disp;
int mod;
code *esw;
code *ct;
retregs = mAX; /* SCASW requires AX */
if (dword)
retregs |= mDX;
else if (ncases <= 6 || config.flags4 & CFG4speed)
goto L1;
c = scodelem(e,&retregs,0,TRUE);
if (dword && mswsame)
{ /* CMP DX,MSW */
c = genc2(c,0x81,modregrm(3,7,DX),msw);
genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
}
ce = getregs(mCX|mDI);
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
if (config.flags3 & CFG3pic)
{ // Add in GOT
code *cx;
code *cgot;
ce = cat(ce, getregs(mDX));
cx = genc2(NULL,CALL,0,0); // CALL L1
gen1(cx, 0x58 + DI); // L1: POP EDI
// ADD EDI,_GLOBAL_OFFSET_TABLE_+3
symbol *gotsym = elfobj_getGOTsym();
cgot = gencs(CNIL,0x81,modregrm(3,0,DI),FLextern,gotsym);
cgot->Iflags = CFoff;
cgot->IEVoffset2 = 3;
makeitextern(gotsym);
genmovreg(cgot, DX, DI); // MOV EDX, EDI
// ADD EDI,offset of switch table
esw = gencs(CNIL,0x81,modregrm(3,0,DI),FLswitch,NULL);
esw->IEV2.Vswitch = b;
esw = cat3(cx, cgot, esw);
}
else
#endif
{
// MOV DI,offset of switch table
esw = gencs(CNIL,0xC7,modregrm(3,0,DI),FLswitch,NULL);
esw->IEV2.Vswitch = b;
}
ce = cat(ce,esw);
movregconst(ce,CX,ncases,0); /* MOV CX,ncases */
/* The switch table will be accessed through ES:DI.
* Therefore, load ES with proper segment value.
*/
if (config.flags3 & CFG3eseqds)
{ assert(!(config.flags & CFGromable));
ce = cat(ce,getregs(mCX)); // allocate CX
}
else
{
ce = cat(ce,getregs(mES|mCX)); // allocate ES and CX
gen1(ce,(config.flags & CFGromable) ? 0x0E : 0x1E); // PUSH CS/DS
gen1(ce,0x07); // POP ES
}
disp = (ncases - 1) * intsize; /* displacement to jump table */
if (dword && !mswsame)
{ code *cloop;
/* Build the following:
L1: SCASW
JNE L2
CMP DX,[CS:]disp[DI]
L2: LOOPNE L1
*/
mod = (disp > 127) ? 2 : 1; /* displacement size */
cloop = genc2(CNIL,0xE0,0,-7 - mod -
((config.flags & CFGromable) ? 1 : 0)); /* LOOPNE scasw */
ce = gen1(ce,0xAF); /* SCASW */
code_orflag(ce,CFtarg2); // target of jump
genjmp(ce,JNE,FLcode,(block *) cloop); /* JNE loop */
/* CMP DX,[CS:]disp[DI] */
ct = genc1(CNIL,0x39,modregrm(mod,DX,5),FLconst,disp);
int flags = (config.flags & CFGromable) ? CFcs : 0; // table is in code seg
ct->Iflags |= flags; // possible seg override
ce = cat3(ce,ct,cloop);
disp += ncases * intsize; /* skip over msw table */
}
else
{
ce = gen1(ce,0xF2); /* REPNE */
gen1(ce,0xAF); /* SCASW */
}
genjmp(ce,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
mod = (disp > 127) ? 2 : 1; /* 1 or 2 byte displacement */
if (config.flags & CFGromable)
gen1(ce,SEGCS); /* table is in code segment */
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
if (config.flags3 & CFG3pic)
{ // ADD EDX,(ncases-1)*2[EDI]
ct = genc1(CNIL,0x03,modregrm(mod,DX,7),FLconst,disp);
// JMP EDX
gen2(ct,0xFF,modregrm(3,4,DX));
}
else
#endif
{ // JMP (ncases-1)*2[DI]
ct = genc1(CNIL,0xFF,modregrm(mod,4,(I32 ? 7 : 5)),FLconst,disp);
int flags = (config.flags & CFGromable) ? CFcs : 0; // table is in code seg
ct->Iflags |= flags;
}
ce = cat(ce,ct);
b->Btablesize = disp + intsize + ncases * tysize[TYnptr];
}
b->Bcode = cat3(cc,c,ce);
//assert(b->Bcode);
cgstate.stackclean--;
}
/******************************
* Output data block for a jump table (BCjmptab).
* The 'holes' in the table get filled with the
* default label.
*/
void outjmptab(block *b)
{
unsigned ncases,n;
targ_llong u,vmin,vmax,val,*p;
targ_size_t alignbytes,def,targ,*poffset;
int jmpseg;
poffset = (config.flags & CFGromable) ? &Coffset : &JMPOFF;
p = b->BS.Bswitch; /* pointer to case data */
ncases = *p++; /* number of cases */
vmax = MINLL; // smallest possible llong
vmin = MAXLL; // largest possible llong
for (n = 0; n < ncases; n++) /* find min case value */
{ val = p[n];
if (val > vmax) vmax = val;
if (val < vmin) vmin = val;
}
jmpseg = (config.flags & CFGromable) ? cseg : JMPSEG;
/* Any alignment bytes necessary */
alignbytes = align(0,*poffset) - *poffset;
obj_lidata(jmpseg,*poffset,alignbytes);
def = list_block(b->Bsucc)->Boffset; /* default address */
assert(vmin <= vmax);
for (u = vmin; ; u++)
{ targ = def; /* default */
for (n = 0; n < ncases; n++)
{ if (p[n] == u)
{ targ = list_block(list_nth(b->Bsucc,n + 1))->Boffset;
break;
}
}
reftocodseg(jmpseg,*poffset,targ);
*poffset += tysize[TYnptr];
if (u == vmax) /* for case that (vmax == ~0) */
break;
}
}
/******************************
* Output data block for a switch table.
* Two consecutive tables, the first is the case value table, the
* second is the address table.
*/
void outswitab(block *b)
{ unsigned ncases,n;
targ_llong *p;
targ_size_t val;
targ_size_t alignbytes,*poffset;
int seg; /* target segment for table */
list_t bl;
unsigned sz;
targ_size_t offset;
//printf("outswitab()\n");
p = b->BS.Bswitch; /* pointer to case data */
ncases = *p++; /* number of cases */
if (config.flags & CFGromable)
{ poffset = &Coffset;
assert(cseg == CODE);
seg = cseg;
}
else
{
poffset = &JMPOFF;
seg = JMPSEG;
}
offset = *poffset;
alignbytes = align(0,*poffset) - *poffset;
obj_lidata(seg,*poffset,alignbytes); /* any alignment bytes necessary */
assert(*poffset == offset + alignbytes);
sz = intsize;
for (n = 0; n < ncases; n++) /* send out value table */
{
//printf("\tcase %d, offset = x%x\n", n, *poffset);
#if OMFOBJ
*poffset +=
#endif
obj_bytes(seg,*poffset,sz,p);
p++;
}
offset += alignbytes + sz * ncases;
assert(*poffset == offset);
if (b->Btablesize == ncases * (REGSIZE * 2 + tysize[TYnptr]))
{
/* Send out MSW table */
p -= ncases;
for (n = 0; n < ncases; n++)
{ val = MSREG(*p);
p++;
#if OMFOBJ
*poffset +=
#endif
obj_bytes(seg,*poffset,REGSIZE,&val);
}
offset += REGSIZE * ncases;
assert(*poffset == offset);
}
bl = b->Bsucc;
for (n = 0; n < ncases; n++) /* send out address table */
{ bl = list_next(bl);
reftocodseg(seg,*poffset,list_block(bl)->Boffset);
*poffset += tysize[TYnptr];
}
assert(*poffset == offset + ncases * tysize[TYnptr]);
}
/*****************************
* Return a jump opcode relevant to the elem for a JMP TRUE.
*/
int jmpopcode(elem *e)
{ tym_t tym;
int zero,i,jp,op;
static const char jops[][2][6] =
{ /* <= > < >= == != <=0 >0 <0 >=0 ==0 !=0 */
{ {JLE,JG ,JL ,JGE,JE ,JNE},{JLE,JG ,JS ,JNS,JE ,JNE} }, /* signed */
{ {JBE,JA ,JB ,JAE,JE ,JNE},{JE ,JNE,JB ,JAE,JE ,JNE} }, /* unsigned */
#if 0
{ {JLE,JG ,JL ,JGE,JE ,JNE},{JLE,JG ,JL ,JGE,JE ,JNE} }, /* real */
{ {JBE,JA ,JB ,JAE,JE ,JNE},{JBE,JA ,JB ,JAE,JE ,JNE} }, /* 8087 */
{ {JA ,JBE,JAE,JB ,JE ,JNE},{JBE,JA ,JB ,JAE,JE ,JNE} }, /* 8087 R */
#endif
};
#define XP (JP << 8)
#define XNP (JNP << 8)
static const unsigned jfops[1][26] =
/* le gt lt ge eqeq ne unord lg leg ule ul uge */
{
{ XNP|JBE,JA,XNP|JB,JAE,XNP|JE, XP|JNE,JP, JNE,JNP, JBE,JC,XP|JAE,
/* ug ue ngt nge nlt nle ord nlg nleg nule nul nuge nug nue */
XP|JA,JE,JBE,JB, XP|JAE,XP|JA, JNP,JE, JP, JA, JNC,XNP|JB, XNP|JBE,JNE }, /* 8087 */
};
assert(e);
while (e->Eoper == OPcomma ||
/* The !EOP(e->E1) is to line up with the case in cdeq() where */
/* we decide if mPSW is passed on when evaluating E2 or not. */
(e->Eoper == OPeq && !EOP(e->E1)))
e = e->E2; /* right operand determines it */
op = e->Eoper;
if (e->Ecount != e->Ecomsub) // comsubs just get Z bit set
return JNE;
if (!OTrel(op)) // not relational operator
{
tym_t tymx = tybasic(e->Ety);
if (tyfloating(tymx) && config.inline8087 &&
(tymx == TYldouble || tymx == TYildouble || tymx == TYcldouble ||
tymx == TYcdouble || tymx == TYcfloat ||
op == OPind))
{
return XP|JNE;
}
return (op >= OPbt && op <= OPbts) ? JC : JNE;
}
if (e->E2->Eoper == OPconst)
zero = !boolres(e->E2);
else
zero = 0;
tym = e->E1->Ety;
if (tyfloating(tym))
#if 1
{ i = 0;
if (config.inline8087)
{ i = 1;
#if 1
#define NOSAHF (I64 || config.fpxmmregs)
if (rel_exception(op) || config.flags4 & CFG4fastfloat)
{
if (zero)
{
if (NOSAHF)
op = swaprel(op);
}
else if (NOSAHF)
op = swaprel(op);
else if (cmporder87(e->E2))
op = swaprel(op);
else
;
}
else
{
if (zero && config.target_cpu < TARGET_80386)
;
else
op = swaprel(op);
}
#else
if (zero && !rel_exception(op) && config.target_cpu >= TARGET_80386)
op = swaprel(op);
else if (!zero &&
(cmporder87(e->E2) || !(rel_exception(op) || config.flags4 & CFG4fastfloat)))
/* compare is reversed */
op = swaprel(op);
#endif
}
jp = jfops[0][op - OPle];
goto L1;
}
#else
i = (config.inline8087) ? (3 + cmporder87(e->E2)) : 2;
#endif
else if (tyuns(tym) || tyuns(e->E2->Ety))
i = 1;
else if (tyintegral(tym) || typtr(tym))
i = 0;
else
{
#if DEBUG
elem_print(e);
WRTYxx(tym);
#endif
assert(0);
}
jp = jops[i][zero][op - OPle]; /* table starts with OPle */
L1:
#if DEBUG
if ((jp & 0xF0) != 0x70)
WROP(op),
printf("i %d zero %d op x%x jp x%x\n",i,zero,op,jp);
#endif
assert((jp & 0xF0) == 0x70);
return jp;
}
/**********************************
* Append code to *pc which validates pointer described by
* addressing mode in *pcs. Modify addressing mode in *pcs.
* Input:
* keepmsk mask of registers we must not destroy or use
* if (keepmsk & RMstore), this will be only a store operation
* into the lvalue
*/
void cod3_ptrchk(code **pc,code *pcs,regm_t keepmsk)
{ code *c;
code *cs2;
unsigned char rm,sib;
unsigned reg;
unsigned flagsave;
unsigned opsave;
regm_t idxregs;
regm_t tosave;
regm_t used;
int i;
assert(!I64);
if (!I16 && pcs->Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs))
return; // not designed to deal with 48 bit far pointers
c = *pc;
rm = pcs->Irm;
assert(!(rm & 0x40)); // no disp8 or reg addressing modes
// If the addressing mode is already a register
reg = rm & 7;
if (I16)
{ static const unsigned char imode[8] = { BP,BP,BP,BP,SI,DI,BP,BX };
reg = imode[reg]; // convert [SI] to SI, etc.
}
idxregs = mask[reg];
if ((rm & 0x80 && (pcs->IFL1 != FLoffset || pcs->IEV1.Vuns)) ||
!(idxregs & ALLREGS)
)
{
// Load the offset into a register, so we can push the address
idxregs = (I16 ? IDXREGS : ALLREGS) & ~keepmsk; // only these can be index regs
assert(idxregs);
c = cat(c,allocreg(&idxregs,&reg,TYoffset));
opsave = pcs->Iop;
flagsave = pcs->Iflags;
pcs->Iop = 0x8D;
pcs->Irm |= modregrm(0,reg,0);
pcs->Iflags &= ~(CFopsize | CFss | CFes | CFcs); // no prefix bytes needed
c = gen(c,pcs); // LEA reg,EA
pcs->Iflags = flagsave;
pcs->Iop = opsave;
}
// registers destroyed by the function call
//used = (mBP | ALLREGS | mES) & ~fregsaved;
used = 0; // much less code generated this way
cs2 = CNIL;
tosave = used & (keepmsk | idxregs);
for (i = 0; tosave; i++)
{ regm_t mi = mask[i];
assert(i < REGMAX);
if (mi & tosave) /* i = register to save */
{
int push,pop;
stackchanged = 1;
if (i == ES)
{ push = 0x06;
pop = 0x07;
}
else
{ push = 0x50 + i;
pop = push | 8;
}
c = gen1(c,push); // PUSH i
cs2 = cat(gen1(CNIL,pop),cs2); // POP i
tosave &= ~mi;
}
}
// For 16 bit models, push a far pointer
if (I16)
{ int segreg;
switch (pcs->Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs))
{ case CFes: segreg = 0x06; break;
case CFss: segreg = 0x16; break;
case CFcs: segreg = 0x0E; break;
case 0: segreg = 0x1E; break; // DS
default:
assert(0);
}
// See if we should default to SS:
// (Happens when BP is part of the addressing mode)
if (segreg == 0x1E && (rm & 0xC0) != 0xC0 &&
rm & 2 && (rm & 7) != 7)
{ segreg = 0x16;
if (config.wflags & WFssneds)
pcs->Iflags |= CFss; // because BP won't be there anymore
}
c = gen1(c,segreg); // PUSH segreg
}
c = gen1(c,0x50 + reg); // PUSH reg
// Rewrite the addressing mode in *pcs so it is just 0[reg]
setaddrmode(pcs, idxregs);
pcs->IFL1 = FLoffset;
pcs->IEV1.Vuns = 0;
// Call the validation function
{
makeitextern(rtlsym[RTLSYM_PTRCHK]);
used &= ~(keepmsk | idxregs); // regs destroyed by this exercise
c = cat(c,getregs(used));
// CALL __ptrchk
gencs(c,(LARGECODE) ? 0x9A : CALL,0,FLfunc,rtlsym[RTLSYM_PTRCHK]);
}
*pc = cat(c,cs2);
}
/***********************************
* Determine if BP can be used as a general purpose register.
* Note parallels between this routine and prolog().
* Returns:
* 0 can't be used, needed for frame
* mBP can be used
*/
regm_t cod3_useBP()
{
tym_t tym;
tym_t tyf;
// Note that DOSX memory model cannot use EBP as a general purpose
// register, as SS != DS.
if (!(config.exe & EX_flat) || config.flags & (CFGalwaysframe | CFGnoebp))
goto Lcant;
if (anyiasm)
goto Lcant;
tyf = funcsym_p->ty();
if (tyf & mTYnaked) // if no prolog/epilog for function
goto Lcant;
if (funcsym_p->Sfunc->Fflags3 & Ffakeeh)
{
goto Lcant; // need consistent stack frame
}
tym = tybasic(tyf);
if (tym == TYifunc)
goto Lcant;
stackoffsets(0);
localsize = Aoffset; // an estimate only
// if (localsize)
{
if (!(config.flags4 & CFG4speed) ||
config.target_cpu < TARGET_Pentium ||
tyfarfunc(tym) ||
config.flags & CFGstack ||
localsize >= 0x100 || // arbitrary value < 0x1000
(usednteh & ~NTEHjmonitor) ||
usedalloca
)
goto Lcant;
}
Lcan:
return mBP;
Lcant:
return 0;
}
/***************************************
* Gen code for OPframeptr
*/
code *cdframeptr(elem *e, regm_t *pretregs)
{
unsigned reg;
code cs;
regm_t retregs = *pretregs & allregs;
if (!retregs)
retregs = allregs;
code *cg = allocreg(&retregs, &reg, TYint);
cs.Iop = ESCAPE | ESCframeptr;
cs.Iflags = 0;
cs.Irex = 0;
cs.Irm = reg;
cg = gen(cg,&cs);
return cat(cg,fixresult(e,retregs,pretregs));
}
/***************************************
* Gen code for load of _GLOBAL_OFFSET_TABLE_.
* This value gets cached in the local variable 'localgot'.
*/
code *cdgot(elem *e, regm_t *pretregs)
{
#if TARGET_OSX
regm_t retregs;
unsigned reg;
code *c;
retregs = *pretregs & allregs;
if (!retregs)
retregs = allregs;
c = allocreg(&retregs, &reg, TYnptr);
c = genc(c,CALL,0,0,0,FLgot,0); // CALL L1
gen1(c, 0x58 + reg); // L1: POP reg
return cat(c,fixresult(e,retregs,pretregs));
#elif TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
regm_t retregs;
unsigned reg;
code *c;
code *cgot;
retregs = *pretregs & allregs;
if (!retregs)
retregs = allregs;
c = allocreg(&retregs, &reg, TYnptr);
c = genc2(c,CALL,0,0); // CALL L1
gen1(c, 0x58 + reg); // L1: POP reg
// ADD reg,_GLOBAL_OFFSET_TABLE_+3
symbol *gotsym = elfobj_getGOTsym();
cgot = gencs(CNIL,0x81,modregrm(3,0,reg),FLextern,gotsym);
/* Because the 2:3 offset from L1: is hardcoded,
* this sequence of instructions must not
* have any instructions in between,
* so set CFvolatile to prevent the scheduler from rearranging it.
*/
cgot->Iflags = CFoff | CFvolatile;
cgot->IEVoffset2 = (reg == AX) ? 2 : 3;
makeitextern(gotsym);
return cat3(c,cgot,fixresult(e,retregs,pretregs));
#else
assert(0);
return NULL;
#endif
}
/**************************************************
* Load contents of localgot into EBX.
*/
code *load_localgot()
{
#if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
if (config.flags3 & CFG3pic && I32)
{
if (localgot)
{
localgot->Sflags &= ~GTregcand; // because this hack doesn't work with reg allocator
elem *e = el_var(localgot);
regm_t retregs = mBX;
code *c = codelem(e,&retregs,FALSE);
el_free(e);
return c;
}
else
{
elem *e = el_long(TYnptr, 0);
e->Eoper = OPgot;
regm_t retregs = mBX;
code *c = codelem(e,&retregs,FALSE);
el_free(e);
return c;
}
}
#endif
return NULL;
}
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
/*****************************
* Returns:
* # of bytes stored
*/
#define ONS_OHD 4 // max # of extra bytes added by obj_namestring()
STATIC int obj_namestring(char *p,const char *name)
{ unsigned len;
len = strlen(name);
if (len > 255)
{
short *ps = (short *)p;
p[0] = 0xFF;
p[1] = 0;
ps[1] = len;
memcpy(p + 4,name,len);
len += ONS_OHD;
}
else
{ p[0] = len;
memcpy(p + 1,name,len);
len++;
}
return len;
}
#endif
code *genregs(code *c,unsigned op,unsigned dstreg,unsigned srcreg)
{ return gen2(c,op,modregxrmx(3,dstreg,srcreg)); }
code *gentstreg(code *c,unsigned t)
{
c = gen2(c,0x85,modregxrmx(3,t,t)); // TEST t,t
code_orflag(c,CFpsw);
return c;
}
code *genpush(code *c, unsigned reg)
{
c = gen1(c, 0x50 + (reg & 7));
if (reg & 8)
code_orrex(c, REX_B);
return c;
}
code *genpop(code *c, unsigned reg)
{
c = gen1(c, 0x58 + (reg & 7));
if (reg & 8)
code_orrex(c, REX_B);
return c;
}
/**************************
* Generate a MOV to save a register to a stack slot
*/
code *gensavereg(unsigned& reg, targ_uns slot)
{
// MOV i[BP],reg
unsigned op = 0x89; // normal mov
if (reg == ES)
{ reg = 0; // the real reg number
op = 0x8C; // segment reg mov
}
code *c = genc1(NULL,op,modregxrm(2, reg, BPRM),FLcs,slot);
if (I64)
code_orrex(c, REX_W);
return c;
}
/**************************
* Generate a MOV to,from register instruction.
* Smart enough to dump redundant register moves, and segment
* register moves.
*/
code *genmovreg(code *c,unsigned to,unsigned from)
{
#if DEBUG
if (to > ES || from > ES)
printf("genmovreg(c = %p, to = %d, from = %d)\n",c,to,from);
#endif
assert(to <= ES && from <= ES);
if (to != from)
{
if (to == ES)
c = genregs(c,0x8E,0,from);
else if (from == ES)
c = genregs(c,0x8C,0,to);
else
c = genregs(c,0x89,from,to);
if (I64)
code_orrex(c, REX_W);
}
return c;
}
/***************************************
* Generate immediate multiply instruction for r1=r2*imm.
* Optimize it into LEA's if we can.
*/
code *genmulimm(code *c,unsigned r1,unsigned r2,targ_int imm)
{ code cs;
// These optimizations should probably be put into pinholeopt()
switch (imm)
{ case 1:
c = genmovreg(c,r1,r2);
break;
case 5:
cs.Iop = LEA;
cs.Iflags = 0;
cs.Irex = 0;
buildEA(&cs,r2,r2,4,0);
cs.orReg(r1);
c = gen(c,&cs);
break;
default:
c = genc2(c,0x69,modregxrmx(3,r1,r2),imm); // IMUL r1,r2,imm
break;
}
return c;
}
/******************************
* Load CX with the value of _AHSHIFT.
*/
code *genshift(code *c)
{
#if SCPP && TX86
code *c1;
// Set up ahshift to trick ourselves into giving the right fixup,
// which must be seg-relative, external frame, external target.
c1 = gencs(CNIL,0xC7,modregrm(3,0,CX),FLfunc,rtlsym[RTLSYM_AHSHIFT]);
c1->Iflags |= CFoff;
return cat(c,c1);
#else
assert(0);
return 0;
#endif
}
/******************************
* Move constant value into reg.
* Take advantage of existing values in registers.
* If flags & mPSW
* set flags based on result
* Else if flags & 8
* do not disturb flags
* Else
* don't care about flags
* If flags & 1 then byte move
* If flags & 2 then short move (for I32 and I64)
* If flags & 4 then don't disturb unused portion of register
* If flags & 16 then reg is a byte register AL..BH
* If flags & 64 (0x40) then 64 bit move (I64 only)
* Returns:
* code (if any) generated
*/
code *movregconst(code *c,unsigned reg,targ_size_t value,regm_t flags)
{ unsigned r;
regm_t mreg;
//printf("movregconst(reg=%s, value= %lld (%llx), flags=%x)\n", regm_str(mask[reg]), value, value, flags);
#define genclrreg(a,r) genregs(a,0x31,r,r)
regm_t regm = regcon.immed.mval & mask[reg];
targ_size_t regv = regcon.immed.value[reg];
if (flags & 1) // 8 bits
{
value &= 0xFF;
regm &= BYTEREGS;
// If we already have the right value in the right register
if (regm && (regv & 0xFF) == value)
goto L2;
if (flags & 16 && reg & 4 && // if an H byte register
regcon.immed.mval & mask[reg & 3] &&
(((regv = regcon.immed.value[reg & 3]) >> 8) & 0xFF) == value)
goto L2;
/* Avoid byte register loads on Pentium Pro and Pentium II
* to avoid dependency stalls.
*/
if (config.flags4 & CFG4speed &&
config.target_cpu >= TARGET_PentiumPro && !(flags & 4))
goto L3;
// See if another register has the right value
r = 0;
for (mreg = (regcon.immed.mval & BYTEREGS); mreg; mreg >>= 1)
{
if (mreg & 1)
{
if ((regcon.immed.value[r] & 0xFF) == value)
{ c = genregs(c,0x8A,reg,r); // MOV regL,rL
if (I64 && reg >= 4 || r >= 4)
code_orrex(c, REX);
goto L2;
}
if (!(I64 && reg >= 4) &&
r < 4 && ((regcon.immed.value[r] >> 8) & 0xFF) == value)
{ c = genregs(c,0x8A,reg,r | 4); // MOV regL,rH
goto L2;
}
}
r++;
}
if (value == 0 && !(flags & 8))
{
if (!(flags & 4) && // if we can set the whole register
!(flags & 16 && reg & 4)) // and reg is not an H register
{ c = genregs(c,0x31,reg,reg); // XOR reg,reg
regimmed_set(reg,value);
regv = 0;
}
else
c = genregs(c,0x30,reg,reg); // XOR regL,regL
flags &= ~mPSW; // flags already set by XOR
}
else
{ c = genc2(c,0xC6,modregrmx(3,0,reg),value); /* MOV regL,value */
if (reg >= 4 && I64)
{
code_orrex(c, REX);
}
}
L2:
if (flags & mPSW)
genregs(c,0x84,reg,reg); // TEST regL,regL
if (regm)
// Set just the 'L' part of the register value
regimmed_set(reg,(regv & ~(targ_size_t)0xFF) | value);
else if (flags & 16 && reg & 4 && regcon.immed.mval & mask[reg & 3])
// Set just the 'H' part of the register value
regimmed_set((reg & 3),(regv & ~(targ_size_t)0xFF00) | (value << 8));
return c;
}
L3:
if (I16)
value = (targ_short) value; /* sign-extend MSW */
else if (I32)
value = (targ_int) value;
if (!I16 && flags & 2) // load 16 bit value
{
value &= 0xFFFF;
if (value == 0)
goto L1;
else
{
if (flags & mPSW)
goto L1;
code *c1 = genc2(CNIL,0xC7,modregrmx(3,0,reg),value); // MOV reg,value
c1->Iflags |= CFopsize; // yes, even for I64
c = cat(c,c1);
if (regm)
// High bits of register are not affected by 16 bit load
regimmed_set(reg,(regv & ~(targ_size_t)0xFFFF) | value);
}
return c;
}
L1:
/* If we already have the right value in the right register */
if (regm && (regv & 0xFFFFFFFF) == (value & 0xFFFFFFFF) && !(flags & 64))
{ if (flags & mPSW)
c = gentstreg(c,reg);
}
else if (flags & 64 && regm && regv == value)
{ // Look at the full 64 bits
if (flags & mPSW)
{
c = gentstreg(c,reg);
code_orrex(c, REX_W);
}
}
else
{
if (flags & mPSW)
{
switch (value)
{ case 0:
c = genclrreg(c,reg);
if (flags & 64)
code_orrex(c, REX_W);
break;
case 1:
if (I64)
goto L4;
c = genclrreg(c,reg);
goto inc;
case -1:
if (I64)
goto L4;
c = genclrreg(c,reg);
goto dec;
default:
L4:
if (flags & 64)
{
c = genc2(c,0xC7,(REX_W << 16) | modregrmx(3,0,reg),value); // MOV reg,value64
gentstreg(c,reg);
code_orrex(c, REX_W);
}
else
{ c = genc2(c,0xC7,modregrmx(3,0,reg),value); /* MOV reg,value */
gentstreg(c,reg);
}
break;
}
}
else
{
/* Look for single byte conversion */
if (regcon.immed.mval & mAX)
{
if (I32)
{ if (reg == AX && value == (targ_short) regv)
{ c = gen1(c,0x98); /* CWDE */
goto done;
}
if (reg == DX &&
value == (regcon.immed.value[AX] & 0x80000000 ? 0xFFFFFFFF : 0) &&
!(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
)
{ c = gen1(c,0x99); /* CDQ */
goto done;
}
}
else if (I16)
{
if (reg == AX &&
(targ_short) value == (signed char) regv)
{ c = gen1(c,0x98); /* CBW */
goto done;
}
if (reg == DX &&
(targ_short) value == (regcon.immed.value[AX] & 0x8000 ? (targ_short) 0xFFFF : (targ_short) 0) &&
!(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
)
{ c = gen1(c,0x99); /* CWD */
goto done;
}
}
}
if (value == 0 && !(flags & 8) && config.target_cpu >= TARGET_80486)
{ c = genclrreg(c,reg); // CLR reg
if (flags & 64)
code_orrex(c, REX_W);
goto done;
}
if (!I64 && regm && !(flags & 8))
{ if (regv + 1 == value ||
/* Catch case of (0xFFFF+1 == 0) for 16 bit compiles */
(I16 && (targ_short)(regv + 1) == (targ_short)value))
{
inc:
c = gen1(c,0x40 + reg); /* INC reg */
goto done;
}
if (regv - 1 == value)
{
dec:
c = gen1(c,0x48 + reg); /* DEC reg */
goto done;
}
}
/* See if another register has the right value */
r = 0;
for (mreg = regcon.immed.mval; mreg; mreg >>= 1)
{
#ifdef DEBUG
assert(!I16 || regcon.immed.value[r] == (targ_short)regcon.immed.value[r]);
#endif
if (mreg & 1 && regcon.immed.value[r] == value)
{ c = genmovreg(c,reg,r);
if (flags & 64)
code_orrex(c, REX_W);
goto done;
}
r++;
}
if (value == 0 && !(flags & 8))
{ c = genclrreg(c,reg); // CLR reg
if (flags & 64)
code_orrex(c, REX_W);
}
else
{ /* See if we can just load a byte */
if (regm & BYTEREGS &&
!(config.flags4 & CFG4speed && config.target_cpu >= TARGET_PentiumPro)
)
{
if ((regv & ~(targ_size_t)0xFF) == (value & ~(targ_size_t)0xFF))
{ c = movregconst(c,reg,value,(flags & 8) |4|1); // load regL
return c;
}
if (regm & (mAX|mBX|mCX|mDX) &&
(regv & ~(targ_size_t)0xFF00) == (value & ~(targ_size_t)0xFF00) &&
!I64)
{ c = movregconst(c,4|reg,value >> 8,(flags & 8) |4|1|16); // load regH
return c;
}
}
if (flags & 64)
c = genc2(c,0xC7,(REX_W << 16) | modregrmx(3,0,reg),value); // MOV reg,value64
else
c = genc2(c,0xC7,modregrmx(3,0,reg),value); // MOV reg,value
}
}
done:
regimmed_set(reg,value);
}
return c;
}
/**************************
* Generate a jump instruction.
*/
code *genjmp(code *c,unsigned op,unsigned fltarg,block *targ)
{ code cs;
code *cj;
code *cnop;
cs.Iop = op & 0xFF;
cs.Iflags = 0;
cs.Irex = 0;
if (op != JMP && op != 0xE8) // if not already long branch
cs.Iflags = CFjmp16; /* assume long branch for op = 0x7x */
cs.IFL2 = fltarg; /* FLblock (or FLcode) */
cs.IEV2.Vblock = targ; /* target block (or code) */
if (fltarg == FLcode)
((code *)targ)->Iflags |= CFtarg;
if (config.flags4 & CFG4fastfloat) // if fast floating point
return gen(c,&cs);
cj = gen(CNIL,&cs);
switch (op & 0xFF00) /* look at second jump opcode */
{
/* The JP and JNP come from floating point comparisons */
case JP << 8:
cs.Iop = JP;
gen(cj,&cs);
break;
case JNP << 8:
/* Do a JP around the jump instruction */
cnop = gennop(CNIL);
c = genjmp(c,JP,FLcode,(block *) cnop);
cat(cj,cnop);
break;
case 1 << 8: /* toggled no jump */
case 0 << 8:
break;
default:
#ifdef DEBUG
printf("jop = x%x\n",op);
#endif
assert(0);
}
return cat(c,cj);
}
/*******************************
* Generate code for a function start.
* Input:
* Coffset address of start of code
* Output:
* Coffset adjusted for size of code generated
* EBPtoESP
* hasframe
* BPoff
*/
code *prolog()
{
SYMIDX si;
unsigned reg;
char enter;
unsigned Foffset;
unsigned xlocalsize; // amount to subtract from ESP to make room for locals
unsigned pushallocreg;
char guessneedframe;
regm_t namedargs = 0;
//printf("cod3.prolog(), needframe = %d, Aalign = %d\n", needframe, Aalign);
debugx(debugw && printf("funcstart()\n"));
regcon.immed.mval = 0; /* no values in registers yet */
EBPtoESP = -REGSIZE;
hasframe = 0;
char pushds = 0;
BPoff = 0;
code *c = CNIL;
int pushalloc = 0;
tym_t tyf = funcsym_p->ty();
tym_t tym = tybasic(tyf);
unsigned farfunc = tyfarfunc(tym);
pushallocreg = (tyf == TYmfunc) ? CX : AX;
if (config.flags & CFGalwaysframe || funcsym_p->Sfunc->Fflags3 & Ffakeeh)
needframe = 1;
Lagain:
guessneedframe = needframe;
// if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_SOLARIS) && !(usednteh & ~NTEHjmonitor))
// usednteh |= NTEHpassthru;
/* Compute BP offsets for variables on stack.
* The organization is:
* Poff parameters
* seg of return addr (if far function)
* IP of return addr
* BP-> caller's BP
* DS (if Windows prolog/epilog)
* exception handling context symbol
* Aoff autos and regs
* regsave.off any saved registers
* Foff floating register
* AAoff alloca temporary
* CSoff common subs
* NDPoff any 8087 saved registers
* Toff temporaries
* monitor context record
* any saved registers
*/
if (tym == TYifunc)
Poff = 26;
else if (I64)
Poff = 16;
else if (I32)
Poff = farfunc ? 12 : 8;
else
Poff = farfunc ? 6 : 4;
Aoff = 0;
#if NTEXCEPTIONS == 2
Aoff -= nteh_contextsym_size();
#if MARS
if (funcsym_p->Sfunc->Fflags3 & Ffakeeh && nteh_contextsym_size() == 0)
Aoff -= 5 * 4;
#endif
#endif
Aoff = -align(0,-Aoff + Aoffset);
regsave.off = Aoff - align(0,regsave.top);
Foffset = floatreg ? (config.fpxmmregs ? 16 : DOUBLESIZE) : 0;
Foff = regsave.off - align(0,Foffset);
assert(usedalloca != 1);
AAoff = usedalloca ? (Foff - REGSIZE) : Foff;
CSoff = AAoff - align(0,cstop * REGSIZE);
NDPoff = CSoff - align(0,NDP::savetop * NDPSAVESIZE);
Toff = NDPoff - align(0,Toffset);
if (Foffset > Aalign)
Aalign = Foffset;
if (Aalign > REGSIZE)
{
// Adjust Aoff so that it is Aalign byte aligned, assuming that
// before function parameters were pushed the stack was
// Aalign byte aligned
targ_size_t psize = (Poffset + (REGSIZE - 1)) & ~(REGSIZE - 1);
int sz = psize + -Aoff + Poff + (needframe ? 0 : REGSIZE);
if (sz & (Aalign - 1))
{ int adj = Aalign - (sz & (Aalign - 1));
Aoff -= adj;
regsave.off -= adj;
Foff -= adj;
AAoff -= adj;
CSoff -= adj;
NDPoff -= adj;
Toff -= adj;
}
}
localsize = -Toff;
regm_t topush = fregsaved & ~mfuncreg; // mask of registers that need saving
int npush = 0; // number of registers that need saving
for (regm_t x = topush; x; x >>= 1)
npush += x & 1;
// Keep the stack aligned by 8 for any subsequent function calls
if (!I16 && calledafunc &&
(STACKALIGN == 16 || config.flags4 & CFG4stackalign))
{
//printf("npush = %d Poff = x%x needframe = %d localsize = x%x\n", npush, Poff, needframe, localsize);
int sz = Poff + (needframe ? 0 : -REGSIZE) + localsize + npush * REGSIZE;
if (STACKALIGN == 16)
{
if (sz & (8|4))
localsize += STACKALIGN - (sz & (8|4));
}
else if (sz & 4)
localsize += 4;
}
//printf("Foff x%02x Aoff x%02x Toff x%02x NDPoff x%02x CSoff x%02x Poff x%02x localsize x%02x\n",
//(int)Foff,(int)Aoff,(int)Toff,(int)NDPoff,(int)CSoff,(int)Poff,(int)localsize);
xlocalsize = localsize;
if (tyf & mTYnaked) // if no prolog/epilog for function
{
hasframe = 1;
return NULL;
}
if (tym == TYifunc)
{ static unsigned char ops2[] = { 0x60,0x1E,0x06,0 };
static unsigned char ops0[] = { 0x50,0x51,0x52,0x53,
0x54,0x55,0x56,0x57,
0x1E,0x06,0 };
unsigned char *p;
p = (config.target_cpu >= TARGET_80286) ? ops2 : ops0;
do
c = gen1(c,*p);
while (*++p);
c = genregs(c,0x8B,BP,SP); // MOV BP,SP
if (localsize)
c = genc2(c,0x81,modregrm(3,5,SP),localsize); // SUB SP,localsize
tyf |= mTYloadds;
hasframe = 1;
goto Lcont;
}
/* Determine if we need BP set up */
if (config.flags & CFGalwaysframe)
needframe = 1;
else
{
if (localsize)
{
if (I16 ||
!(config.flags4 & CFG4speed) ||
config.target_cpu < TARGET_Pentium ||
farfunc ||
config.flags & CFGstack ||
xlocalsize >= 0x1000 ||
(usednteh & ~NTEHjmonitor) ||
anyiasm ||
usedalloca
)
needframe = 1;
}
if (refparam && (anyiasm || I16))
needframe = 1;
}
if (needframe)
{ assert(mfuncreg & mBP); // shouldn't have used mBP
if (!guessneedframe) // if guessed wrong
goto Lagain;
}
if (I16 && config.wflags & WFwindows && farfunc)
{ int wflags;
int segreg;
#if SCPP
// alloca() can't be because the 'special' parameter won't be at
// a known offset from BP.
if (usedalloca == 1)
synerr(EM_alloca_win); // alloca() can't be in Windows functions
#endif
wflags = config.wflags;
if (wflags & WFreduced && !(tyf & mTYexport))
{ // reduced prolog/epilog for non-exported functions
wflags &= ~(WFdgroup | WFds | WFss);
}
c = getregs(mAX);
assert(!c); /* should not have any value in AX */
switch (wflags & (WFdgroup | WFds | WFss))
{ case WFdgroup: // MOV AX,DGROUP
if (wflags & WFreduced)
tyf &= ~mTYloadds; // remove redundancy
c = genc(c,0xC7,modregrm(3,0,AX),0,0,FLdatseg,(targ_uns) 0);
c->Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg
break;
case WFss:
segreg = 2; // SS
goto Lmovax;
case WFds:
segreg = 3; // DS
Lmovax:
c = gen2(c,0x8C,modregrm(3,segreg,AX)); // MOV AX,segreg
if (wflags & WFds)
gen1(c,0x90); // NOP
break;
case 0:
break;
default:
#ifdef DEBUG
printf("config.wflags = x%x\n",config.wflags);
#endif
assert(0);
}
if (wflags & WFincbp)
c = gen1(c,0x40 + BP); // INC BP
c = gen1(c,0x50 + BP); // PUSH BP
genregs(c,0x8B,BP,SP); // MOV BP,SP
if (wflags & (WFsaveds | WFds | WFss | WFdgroup))
{ gen1(c,0x1E); // PUSH DS
pushds = TRUE;
BPoff = -REGSIZE;
}
if (wflags & (WFds | WFss | WFdgroup))
gen2(c,0x8E,modregrm(3,3,AX)); // MOV DS,AX
enter = FALSE; /* don't use ENTER instruction */
hasframe = 1; /* we have a stack frame */
}
else
if (needframe) // if variables or parameters
{
if (config.wflags & WFincbp && farfunc)
c = gen1(c,0x40 + BP); /* INC BP */
if (config.target_cpu < TARGET_80286 ||
config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_OSX64 | EX_FREEBSD | EX_FREEBSD64 | EX_SOLARIS | EX_SOLARIS64) ||
!localsize ||
config.flags & CFGstack ||
(xlocalsize >= 0x1000 && config.exe & EX_flat) ||
localsize >= 0x10000 ||
#if NTEXCEPTIONS == 2
(usednteh & ~NTEHjmonitor && (config.flags2 & CFG2seh)) ||
#endif
(config.target_cpu >= TARGET_80386 &&
config.flags4 & CFG4speed)
)
{
c = gen1(c,0x50 + BP); // PUSH BP
genregs(c,0x8B,BP,SP); // MOV BP,SP
if (I64)
code_orrex(c, REX_W); // MOV RBP,RSP
#if ELFOBJ || MACHOBJ
if (config.fulltypes)
// Don't reorder instructions, as dwarf CFA relies on it
code_orflag(c, CFvolatile);
#endif
enter = FALSE; /* do not use ENTER instruction */
#if NTEXCEPTIONS == 2
if (usednteh & ~NTEHjmonitor && (config.flags2 & CFG2seh))
{
code *ce = nteh_prolog();
c = cat(c,ce);
int sz = nteh_contextsym_size();
assert(sz != 0); // should be 5*4, not 0
xlocalsize -= sz; // sz is already subtracted from ESP
// by nteh_prolog()
}
#endif
#if ELFOBJ || MACHOBJ
if (config.fulltypes)
{ int off = I64 ? 16 : 8;
dwarf_CFA_set_loc(1); // address after PUSH EBP
dwarf_CFA_set_reg_offset(SP, off); // CFA is now 8[ESP]
dwarf_CFA_offset(BP, -off); // EBP is at 0[ESP]
dwarf_CFA_set_loc(3); // address after MOV EBP,ESP
// Yes, I know the parameter is 8 when we mean 0!
// But this gets the cfa register set to EBP correctly
dwarf_CFA_set_reg_offset(BP, off); // CFA is now 0[EBP]
}
#endif
}
else
enter = TRUE;
hasframe = 1;
}
if (config.flags & CFGstack) /* if stack overflow check */
goto Ladjstack;
if (needframe) /* if variables or parameters */
{
if (xlocalsize) /* if any stack offset */
{
Ladjstack:
#if !TARGET_LINUX // seems that Linux doesn't need to fault in stack pages
if ((config.flags & CFGstack && !(I32 && xlocalsize < 0x1000)) // if stack overflow check
#if TARGET_WINDOS
|| (xlocalsize >= 0x1000 && config.exe & EX_flat)
#endif
)
{
if (I16)
{
// BUG: Won't work if parameter is passed in AX
c = movregconst(c,AX,xlocalsize,FALSE); // MOV AX,localsize
makeitextern(rtlsym[RTLSYM_CHKSTK]);
// CALL _chkstk
gencs(c,(LARGECODE) ? 0x9A : CALL,0,FLfunc,rtlsym[RTLSYM_CHKSTK]);
useregs((ALLREGS | mBP | mES) & ~rtlsym[RTLSYM_CHKSTK]->Sregsaved);
}
else
{
/* Watch out for 64 bit code where EDX is passed as a register parameter
*/
int reg = I64 ? R11 : DX; // scratch register
/* MOV EDX, xlocalsize/0x1000
* L1: SUB ESP, 0x1000
* TEST [ESP],ESP
* DEC EDX
* JNE L1
* SUB ESP, xlocalsize % 0x1000
*/
c = movregconst(c, reg, xlocalsize / 0x1000, FALSE);
code *csub = genc2(NULL,0x81,modregrm(3,5,SP),0x1000);
if (I64)
code_orrex(csub, REX_W);
code_orflag(csub, CFtarg2);
gen2sib(csub, 0x85, modregrm(0,SP,4),modregrm(0,4,SP));
if (I64)
{ gen2(csub, 0xFF, (REX_W << 16) | modregrmx(3,0,R11)); // DEC R11
genc2(csub,JNE,0,(targ_uns)-14);
}
else
{ gen1(csub, 0x48 + DX); // DEC EDX
genc2(csub,JNE,0,(targ_uns)-12);
}
regimmed_set(reg,0); // reg is now 0
genc2(csub,0x81,modregrm(3,5,SP),xlocalsize & 0xFFF);
if (I64)
code_orrex(csub, REX_W);
c = cat(c,csub);
useregs(mask[reg]);
}
}
else
#endif
{
if (enter)
{ // ENTER xlocalsize,0
c = genc(c,0xC8,0,FLconst,xlocalsize,FLconst,(targ_uns) 0);
#if ELFOBJ || MACHOBJ
assert(!config.fulltypes); // didn't emit Dwarf data
#endif
}
else if (xlocalsize == REGSIZE && config.flags4 & CFG4optimized)
{ c = gen1(c,0x50 + pushallocreg); // PUSH AX
// Do this to prevent an -x[EBP] to be moved in
// front of the push.
code_orflag(c,CFvolatile);
pushalloc = 1;
}
else
{ // SUB SP,xlocalsize
c = genc2(c,0x81,modregrm(3,5,SP),xlocalsize);
if (I64)
code_orrex(c, REX_W);
}
}
if (usedalloca)
{
// Set up magic parameter for alloca()
// MOV -REGSIZE[BP],localsize - BPoff
//c = genc(c,0xC7,modregrm(2,0,BPRM),FLconst,-REGSIZE,FLconst,localsize - BPoff);
c = genc(c,0xC7,modregrm(2,0,BPRM),
FLconst,AAoff + BPoff,
FLconst,localsize - BPoff);
if (I64)
code_orrex(c, REX_W);
}
}
else
assert(usedalloca == 0);
}
else if (xlocalsize)
{
assert(I32);
if (xlocalsize == REGSIZE)
{ c = gen1(c,0x50 + pushallocreg); // PUSH AX
pushalloc = 1;
}
else if (xlocalsize == 2 * REGSIZE)
{ c = gen1(c,0x50 + pushallocreg); // PUSH AX
gen1(c,0x50 + pushallocreg); // PUSH AX
pushalloc = 1;
}
else
{ // SUB ESP,xlocalsize
c = genc2(c,0x81,modregrm(3,5,SP),xlocalsize);
if (I64)
code_orrex(c, REX_W);
}
BPoff += REGSIZE;
}
else
assert((localsize | usedalloca) == 0 || (usednteh & NTEHjmonitor));
EBPtoESP += xlocalsize;
/* The idea is to generate trace for all functions if -Nc is not thrown.
* If -Nc is thrown, generate trace only for global COMDATs, because those
* are relevant to the FUNCTIONS statement in the linker .DEF file.
* This same logic should be in epilog().
*/
if (config.flags & CFGtrace &&
(!(config.flags4 & CFG4allcomdat) ||
funcsym_p->Sclass == SCcomdat ||
funcsym_p->Sclass == SCglobal ||
(config.flags2 & CFG2comdat && SymInline(funcsym_p))
)
)
{
if (STACKALIGN == 16 && npush)
{ /* This could be avoided by moving the function call to after the
* registers are saved. But I don't remember why the call is here
* and not there.
*/
c = genc2(c,0x81,modregrm(3,5,SP),npush * REGSIZE); // SUB ESP,npush * REGSIZE
if (I64)
code_orrex(c, REX_W);
}
symbol *s = rtlsym[farfunc ? RTLSYM_TRACE_PRO_F : RTLSYM_TRACE_PRO_N];
makeitextern(s);
c = gencs(c,I16 ? 0x9A : CALL,0,FLfunc,s); // CALL _trace
if (!I16)
code_orflag(c,CFoff | CFselfrel);
/* Embedding the function name inline after the call works, but it
* makes disassembling the code annoying.
*/
#if ELFOBJ || MACHOBJ
size_t len = strlen(funcsym_p->Sident);
char *buffer = (char *)malloc(len + 4);
assert(buffer);
if (len <= 254)
{ buffer[0] = len;
memcpy(buffer + 1, funcsym_p->Sident, len);
len++;
}
else
{ buffer[0] = 0xFF;
buffer[1] = 0;
buffer[2] = len & 0xFF;
buffer[3] = len >> 8;
memcpy(buffer + 4, funcsym_p->Sident, len);
len += 4;
}
genasm(c, buffer, len); // append func name
free(buffer);
#else
char name[IDMAX+IDOHD+1];
size_t len = obj_mangle(funcsym_p,name);
assert(len < sizeof(name));
genasm(c,name,len); // append func name
#endif
if (STACKALIGN == 16 && npush)
{
c = genc2(c,0x81,modregrm(3,0,SP),npush * REGSIZE); // ADD ESP,npush * REGSIZE
if (I64)
code_orrex(c, REX_W);
}
useregs((ALLREGS | mBP | mES) & ~s->Sregsaved);
}
#if MARS
if (usednteh & NTEHjmonitor)
{ Symbol *sthis;
for (si = 0; 1; si++)
{ assert(si < globsym.top);
sthis = globsym.tab[si];
if (strcmp(sthis->Sident,"this") == 0)
break;
}
c = cat(c,nteh_monitor_prolog(sthis));
EBPtoESP += 3 * 4;
}
#endif
while (topush) /* while registers to push */
{ reg = findreg(topush);
topush &= ~mask[reg];
c = gen1(c,0x50 + (reg & 7));
if (reg & 8)
code_orrex(c, REX_B);
EBPtoESP += REGSIZE;
#if ELFOBJ || MACHOBJ
if (config.fulltypes)
{ // Emit debug_frame data giving location of saved register
// relative to 0[EBP]
pinholeopt(c, NULL);
dwarf_CFA_set_loc(calcblksize(c)); // address after PUSH reg
dwarf_CFA_offset(reg, -EBPtoESP - REGSIZE);
}
#endif
}
Lcont:
/* Determine if we need to reload DS */
if (tyf & mTYloadds)
{ code *c1;
if (!pushds) // if not already pushed
c = gen1(c,0x1E); // PUSH DS
c1 = genc(CNIL,0xC7,modregrm(3,0,AX),0,0,FLdatseg,(targ_uns) 0); /* MOV AX,DGROUP */
c1->Iflags ^= CFseg | CFoff; /* turn off CFoff, on CFseg */
c = cat(c,c1);
gen2(c,0x8E,modregrm(3,3,AX)); /* MOV DS,AX */
useregs(mAX);
}
if (tym == TYifunc)
c = gen1(c,0xFC); // CLD
#if NTEXCEPTIONS == 2
if (usednteh & NTEH_except)
c = cat(c,nteh_setsp(0x89)); // MOV __context[EBP].esp,ESP
#endif
// Load register parameters off of the stack. Do not use
// assignaddr(), as it will replace the stack reference with
// the register!
for (si = 0; si < globsym.top; si++)
{ symbol *s = globsym.tab[si];
code *c2;
unsigned sz = type_size(s->Stype);
if ((s->Sclass == SCregpar || s->Sclass == SCparameter) &&
s->Sfl == FLreg &&
(refparam
#if MARS
// This variable has been reference by a nested function
|| s->Stype->Tty & mTYvolatile
#endif
))
{
/* MOV reg,param[BP] */
//assert(refparam);
if (mask[s->Sreglsw] & XMMREGS)
{
unsigned op = xmmload(s->Stype->Tty); // MOVSS/D xreg,mem
unsigned xreg = s->Sreglsw - XMM0;
code *c2 = genc1(CNIL,op,modregxrm(2,xreg,BPRM),FLconst,Poff + s->Soffset);
if (!hasframe)
{ // Convert to ESP relative address rather than EBP
c2->Irm = modregxrm(2,xreg,4);
c2->Isib = modregrm(0,4,SP);
c2->IEVpointer1 += EBPtoESP;
}
c = cat(c,c2);
}
else
{
code *c2 = genc1(CNIL,0x8B ^ (sz == 1),
modregxrm(2,s->Sreglsw,BPRM),FLconst,Poff + s->Soffset);
if (!I16 && sz == SHORTSIZE)
c2->Iflags |= CFopsize; // operand size
if (I64 && sz >= REGSIZE)
c2->Irex |= REX_W;
if (!hasframe)
{ /* Convert to ESP relative address rather than EBP */
assert(!I16);
c2->Irm = modregxrm(2,s->Sreglsw,4);
c2->Isib = modregrm(0,4,SP);
c2->IEVpointer1 += EBPtoESP;
}
if (sz > REGSIZE)
{
code *c3 = genc1(CNIL,0x8B,
modregxrm(2,s->Sregmsw,BPRM),FLconst,Poff + s->Soffset + REGSIZE);
if (I64)
c3->Irex |= REX_W;
if (!hasframe)
{ /* Convert to ESP relative address rather than EBP */
assert(!I16);
c3->Irm = modregxrm(2,s->Sregmsw,4);
c3->Isib = modregrm(0,4,SP);
c3->IEVpointer1 += EBPtoESP;
}
c2 = cat(c2,c3);
}
c = cat(c,c2);
}
}
else if (s->Sclass == SCfastpar)
{ // Argument is passed in a register
unsigned preg = s->Spreg;
namedargs |= mask[preg];
if (s->Sfl == FLreg)
{ // MOV reg,preg
if (mask[preg] & XMMREGS)
{
unsigned op = xmmload(s->Stype->Tty); // MOVSS/D xreg,preg
unsigned xreg = s->Sreglsw - XMM0;
c = gen2(c,op,modregxrmx(3,xreg,preg - XMM0));
}
else
{
c = genmovreg(c,s->Sreglsw,preg);
if (I64 && sz == 8)
code_orrex(c, REX_W);
}
}
else if (s->Sflags & SFLdead ||
(!anyiasm && !(s->Sflags & SFLread) && s->Sflags & SFLunambig &&
#if MARS
// This variable has been reference by a nested function
!(s->Stype->Tty & mTYvolatile) &&
#endif
(config.flags4 & CFG4optimized || !config.fulltypes)))
{
// Ignore it, as it is never referenced
;
}
else
{
targ_size_t offset = Aoff + BPoff + s->Soffset;
int op = 0x89; // MOV x[EBP],preg
if (preg >= XMM0 && preg <= XMM15)
{
op = xmmstore(s->Stype->Tty);
}
if (hasframe)
{
if (!(pushalloc && preg == pushallocreg))
{
// MOV x[EBP],preg
c2 = genc1(CNIL,op,
modregxrm(2,preg,BPRM),FLconst, offset);
if (preg >= XMM0 && preg <= XMM15)
{
}
else
{
//printf("%s Aoff = %d, BPoff = %d, Soffset = %d, sz = %d\n", s->Sident, (int)Aoff, (int)BPoff, (int)s->Soffset, (int)sz);
// if (offset & 2)
// c2->Iflags |= CFopsize;
if (I64 && sz == 8)
code_orrex(c2, REX_W);
}
c = cat(c, c2);
}
}
else
{
offset += EBPtoESP;
if (!(pushalloc && preg == pushallocreg))
{
// MOV offset[ESP],preg
// BUG: byte size?
c2 = genc1(CNIL,op,
(modregrm(0,4,SP) << 8) |
modregxrm(2,preg,4),FLconst,offset);
if (preg >= XMM0 && preg <= XMM15)
{
}
else
{
if (I64 && sz == 8)
c2->Irex |= REX_W;
// if (offset & 2)
// c2->Iflags |= CFopsize;
}
c = cat(c,c2);
}
}
}
}
}
/* Load arguments passed in registers into the varargs save area
* so they can be accessed by va_arg().
*/
if (I64 && variadic(funcsym_p->Stype))
{
/* Look for __va_argsave
*/
symbol *sv = NULL;
for (SYMIDX si = 0; si < globsym.top; si++)
{ symbol *s = globsym.tab[si];
if (s->Sident[0] == '_' && strcmp(s->Sident, "__va_argsave") == 0)
{ sv = s;
break;
}
}
if (sv && !(sv->Sflags & SFLdead))
{
/* Generate code to move any arguments passed in registers into
* the stack variable __va_argsave,
* so we can reference it via pointers through va_arg().
* struct __va_argsave_t {
* size_t[6] regs;
* real[8] fpregs;
* uint offset_regs;
* uint offset_fpregs;
* void* stack_args;
* void* reg_args;
* }
* The MOVAPS instructions seg fault if data is not aligned on
* 16 bytes, so this gives us a nice check to ensure no mistakes.
MOV voff+0*8[RBP],EDI
MOV voff+1*8[RBP],ESI
MOV voff+2*8[RBP],RDX
MOV voff+3*8[RBP],RCX
MOV voff+4*8[RBP],R8
MOV voff+5*8[RBP],R9
MOVZX EAX,AL // AL = 0..8, # of XMM registers used
SHL EAX,2 // 4 bytes for each MOVAPS
LEA RDX,offset L2[RIP]
SUB RDX,RAX
LEA RAX,voff+6*8+0x7F[RBP]
JMP EDX
MOVAPS -0x0F[RAX],XMM7 // only save XMM registers if actually used
MOVAPS -0x1F[RAX],XMM6
MOVAPS -0x2F[RAX],XMM5
MOVAPS -0x3F[RAX],XMM4
MOVAPS -0x4F[RAX],XMM3
MOVAPS -0x5F[RAX],XMM2
MOVAPS -0x6F[RAX],XMM1
MOVAPS -0x7F[RAX],XMM0
L2:
MOV 1[RAX],offset_regs // set __va_argsave.offset_regs
MOV 5[RAX],offset_fpregs // set __va_argsave.offset_fpregs
LEA RDX, Poff+Poffset[RBP]
MOV 9[RAX],RDX // set __va_argsave.stack_args
SUB RAX,6*8+0x7F // point to start of __va_argsave
MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args
*/
targ_size_t voff = Aoff + BPoff + sv->Soffset; // EBP offset of start of sv
const int vregnum = 6;
const unsigned vsize = vregnum * 8 + 8 * 16;
code *cv = CNIL;
static unsigned char regs[vregnum] = { DI,SI,DX,CX,R8,R9 };
if (!hasframe)
voff += EBPtoESP;
for (int i = 0; i < vregnum; i++)
{
unsigned r = regs[i];
if (!(mask[r] & namedargs)) // named args are already dealt with
{ unsigned ea = (REX_W << 16) | modregxrm(2,r,BPRM);
if (!hasframe)
ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,r,4);
cv = genc1(cv,0x89,ea,FLconst,voff + i*8);
}
}
cv = genregs(cv,0x0FB6,AX,AX); // MOVZX EAX,AL
genc2(cv,0xC1,modregrm(3,4,AX),2); // SHL EAX,2
int raxoff = voff+6*8+0x7F;
unsigned L2offset = (raxoff < -0x7F) ? 0x2C : 0x29;
if (!hasframe)
L2offset += 1; // +1 for sib byte
// LEA RDX,offset L2[RIP]
genc1(cv,0x8D,(REX_W << 16) | modregrm(0,DX,5),FLconst,L2offset);
genregs(cv,0x29,AX,DX); // SUB RDX,RAX
code_orrex(cv, REX_W);
// LEA RAX,voff+vsize-6*8-16+0x7F[RBP]
unsigned ea = (REX_W << 16) | modregrm(2,AX,BPRM);
if (!hasframe)
// add sib byte for [RSP] addressing
ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,AX,4);
genc1(cv,0x8D,ea,FLconst,raxoff);
gen2(cv,0xFF,modregrm(3,4,DX)); // JMP EDX
for (int i = 0; i < 8; i++)
{
// MOVAPS -15-16*i[RAX],XMM7-i
genc1(cv,0x0F29,modregrm(0,XMM7-i,0),FLconst,-15-16*i);
}
/* Compute offset_regs and offset_fpregs
*/
unsigned offset_regs = 0;
unsigned offset_fpregs = vregnum * 8;
for (int i = AX; i <= XMM7; i++)
{ regm_t m = mask[i];
if (m & namedargs)
{
if (m & (mDI|mSI|mDX|mCX|mR8|mR9))
offset_regs += 8;
else if (m & XMMREGS)
offset_fpregs += 16;
namedargs &= ~m;
if (!namedargs)
break;
}
}
// MOV 1[RAX],offset_regs
genc(cv,0xC7,modregrm(2,0,AX),FLconst,1,FLconst,offset_regs);
// MOV 5[RAX],offset_fpregs
genc(cv,0xC7,modregrm(2,0,AX),FLconst,5,FLconst,offset_fpregs);
// LEA RDX, Poff+Poffset[RBP]
ea = modregrm(2,DX,BPRM);
if (!hasframe)
ea = (modregrm(0,4,SP) << 8) | modregrm(2,DX,4);
Poffset = (Poffset + (REGSIZE - 1)) & ~(REGSIZE - 1);
genc1(cv,0x8D,(REX_W << 16) | ea,FLconst,Poff + Poffset);
// MOV 9[RAX],RDX
genc1(cv,0x89,(REX_W << 16) | modregrm(2,DX,AX),FLconst,9);
// SUB RAX,6*8+0x7F // point to start of __va_argsave
genc2(cv,0x2D,0,6*8+0x7F);
code_orrex(cv, REX_W);
// MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args
genc1(cv,0x89,(REX_W << 16) | modregrm(2,AX,AX),FLconst,6*8+8*16+4+4+8);
pinholeopt(cv, NULL);
useregs(mDX|mAX);
c = cat(c,cv);
}
}
#if 0 && TARGET_LINUX
if (gotref)
{ // position independent reference
c = cat(c, cod3_load_got());
}
#endif
return c;
}
/*******************************
* Generate and return function epilog.
* Output:
* retsize Size of function epilog
*/
static targ_size_t spoff;
void epilog(block *b)
{ code *c;
code *cr;
code *ce;
code *cpopds;
unsigned reg;
unsigned regx; // register that's not a return reg
regm_t topop,regm;
tym_t tyf,tym;
int op;
char farfunc;
targ_size_t xlocalsize = localsize;
c = CNIL;
ce = b->Bcode;
tyf = funcsym_p->ty();
tym = tybasic(tyf);
farfunc = tyfarfunc(tym);
if (!(b->Bflags & BFLepilog)) // if no epilog code
goto Lret; // just generate RET
regx = (b->BC == BCret) ? AX : CX;
spoff = 0;
retsize = 0;
if (tyf & mTYnaked) // if no prolog/epilog
return;
if (tym == TYifunc)
{ static unsigned char ops2[] = { 0x07,0x1F,0x61,0xCF,0 };
static unsigned char ops0[] = { 0x07,0x1F,0x5F,0x5E,
0x5D,0x5B,0x5B,0x5A,
0x59,0x58,0xCF,0 };
unsigned char *p;
c = genregs(c,0x8B,SP,BP); // MOV SP,BP
p = (config.target_cpu >= TARGET_80286) ? ops2 : ops0;
do
gen1(c,*p);
while (*++p);
goto Lopt;
}
if (config.flags & CFGtrace &&
(!(config.flags4 & CFG4allcomdat) ||
funcsym_p->Sclass == SCcomdat ||
funcsym_p->Sclass == SCglobal ||
(config.flags2 & CFG2comdat && SymInline(funcsym_p))
)
)
{
symbol *s = rtlsym[farfunc ? RTLSYM_TRACE_EPI_F : RTLSYM_TRACE_EPI_N];
makeitextern(s);
c = gencs(c,I16 ? 0x9A : CALL,0,FLfunc,s); // CALLF _trace
if (!I16)
code_orflag(c,CFoff | CFselfrel);
useregs((ALLREGS | mBP | mES) & ~s->Sregsaved);
}
if (usednteh & ~NTEHjmonitor && (config.exe == EX_NT || MARS))
c = cat(c,nteh_epilog());
cpopds = CNIL;
if (tyf & mTYloadds)
{ cpopds = gen1(cpopds,0x1F); // POP DS
c = cat(c,cpopds);
spoff += intsize;
}
/* Pop all the general purpose registers saved on the stack
* by the prolog code. Remember to do them in the reverse
* order they were pushed.
*/
reg = I64 ? R15 : DI;
regm = 1 << reg;
topop = fregsaved & ~mfuncreg;
#ifdef DEBUG
if (topop & ~0xFFFF)
printf("fregsaved = x%x, mfuncreg = x%x\n",fregsaved,mfuncreg);
#endif
assert(!(topop & ~0xFFFF));
while (topop)
{ if (topop & regm)
{ c = gen1(c,0x58 + (reg & 7)); // POP reg
if (reg & 8)
code_orrex(c, REX_B);
topop &= ~regm;
spoff += REGSIZE;
}
regm >>= 1;
reg--;
}
#if MARS
if (usednteh & NTEHjmonitor)
{
regm_t retregs = 0;
if (b->BC == BCretexp)
retregs = regmask(b->Belem->Ety, tym);
code *cn = nteh_monitor_epilog(retregs);
c = cat(c,cn);
xlocalsize += 8;
}
#endif
if (config.wflags & WFwindows && farfunc)
{
int wflags = config.wflags;
if (wflags & WFreduced && !(tyf & mTYexport))
{ // reduced prolog/epilog for non-exported functions
wflags &= ~(WFdgroup | WFds | WFss);
if (!(wflags & WFsaveds))
goto L4;
}
if (localsize | usedalloca)
{
c = genc1(c,0x8D,modregrm(1,SP,6),FLconst,(targ_uns)-2); /* LEA SP,-2[BP] */
}
if (wflags & (WFsaveds | WFds | WFss | WFdgroup))
{ if (cpopds)
cpopds->Iop = NOP; // don't need previous one
c = gen1(c,0x1F); // POP DS
}
c = gen1(c,0x58 + BP); // POP BP
if (config.wflags & WFincbp)
gen1(c,0x48 + BP); // DEC BP
assert(hasframe);
}
else
{
if (needframe || (xlocalsize && hasframe))
{
L4:
assert(hasframe);
if (xlocalsize | usedalloca)
{ if (config.target_cpu >= TARGET_80286 &&
!(config.target_cpu >= TARGET_80386 &&
config.flags4 & CFG4speed)
)
c = gen1(c,0xC9); // LEAVE
else if (0 && xlocalsize == REGSIZE && !usedalloca && I32)
{ // This doesn't work - I should figure out why
mfuncreg &= ~mask[regx];
c = gen1(c,0x58 + regx); // POP regx
c = gen1(c,0x58 + BP); // POP BP
}
else
{ c = genregs(c,0x8B,SP,BP); // MOV SP,BP
if (I64)
code_orrex(c, REX_W); // MOV RSP,RBP
c = gen1(c,0x58 + BP); // POP BP
}
}
else
c = gen1(c,0x58 + BP); // POP BP
if (config.wflags & WFincbp && farfunc)
gen1(c,0x48 + BP); // DEC BP
}
else if (xlocalsize == REGSIZE && (!I16 || b->BC == BCret))
{ mfuncreg &= ~mask[regx];
c = gen1(c,0x58 + regx); // POP regx
}
else if (xlocalsize)
{
c = genc2(c,0x81,modregrm(3,0,SP),xlocalsize); // ADD SP,xlocalsize
if (I64)
code_orrex(c, REX_W);
}
}
if (b->BC == BCret || b->BC == BCretexp)
{
Lret:
op = tyfarfunc(tym) ? 0xCA : 0xC2;
if (tym == TYhfunc)
{
c = genc2(c,0xC2,0,4); // RET 4
}
else if (!typfunc(tym) || // if caller cleans the stack
Poffset == 0) // or nothing pushed on the stack anyway
{ op++; // to a regular RET
c = gen1(c,op);
}
else
{ // Stack is always aligned on register size boundary
Poffset = (Poffset + (REGSIZE - 1)) & ~(REGSIZE - 1);
c = genc2(c,op,0,Poffset); // RET Poffset
}
}
Lopt:
// If last instruction in ce is ADD SP,imm, and first instruction
// in c sets SP, we can dump the ADD.
cr = code_last(ce);
if (cr && c && !I64)
{
if (cr->Iop == 0x81 && cr->Irm == modregrm(3,0,SP)) // if ADD SP,imm
{
if (
c->Iop == 0xC9 || // LEAVE
(c->Iop == 0x8B && c->Irm == modregrm(3,SP,BP)) || // MOV SP,BP
(c->Iop == 0x8D && c->Irm == modregrm(1,SP,6)) // LEA SP,-imm[BP]
)
cr->Iop = NOP;
else if (c->Iop == 0x58 + BP) // if POP BP
{ cr->Iop = 0x8B;
cr->Irm = modregrm(3,SP,BP); // MOV SP,BP
}
}
#if 0 // These optimizations don't work if the called function
// cleans off the stack.
else if (c->Iop == 0xC3 && cr->Iop == CALL) // CALL near
{ cr->Iop = 0xE9; // JMP near
c->Iop = NOP;
}
else if (c->Iop == 0xCB && cr->Iop == 0x9A) // CALL far
{ cr->Iop = 0xEA; // JMP far
c->Iop = NOP;
}
#endif
}
retsize += calcblksize(c); // compute size of function epilog
b->Bcode = cat(ce,c);
}
/*******************************
* Return offset of SP from BP.
*/
targ_size_t cod3_spoff()
{
return spoff + localsize;
}
/**********************************
* Load value of _GLOBAL_OFFSET_TABLE_ into EBX
*/
code *cod3_load_got()
{
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
code *c;
code *cgot;
c = genc2(NULL,CALL,0,0); // CALL L1
gen1(c, 0x58 + BX); // L1: POP EBX
// ADD EBX,_GLOBAL_OFFSET_TABLE_+3
symbol *gotsym = elfobj_getGOTsym();
cgot = gencs(CNIL,0x81,0xC3,FLextern,gotsym);
cgot->Iflags = CFoff;
cgot->IEVoffset2 = 3;
makeitextern(gotsym);
return cat(c,cgot);
#else
assert(0);
return NULL;
#endif
}
code* gen_spill_reg(Symbol* s, bool toreg)
{
code *c;
code cs;
regm_t keepmsk = toreg ? RMload : RMstore;
int sz = type_size(s->Stype);
elem* e = el_var(s); // so we can trick getlvalue() into working for us
if (mask[s->Sreglsw] & XMMREGS)
{ // Convert to save/restore of XMM register
if (toreg)
cs.Iop = xmmload(s->Stype->Tty); // MOVSS/D xreg,mem
else
cs.Iop = xmmstore(s->Stype->Tty); // MOVSS/D mem,xreg
c = getlvalue(&cs,e,keepmsk);
cs.orReg(s->Sreglsw - XMM0);
c = gen(c,&cs);
}
else
{
cs.Iop = toreg ? 0x8B : 0x89; // MOV reg,mem[ESP] : MOV mem[ESP],reg
cs.Iop ^= (sz == 1);
c = getlvalue(&cs,e,keepmsk);
cs.orReg(s->Sreglsw);
if (I64 && sz == 1 && s->Sreglsw >= 4)
cs.Irex |= REX;
c = gen(c,&cs);
if (sz > REGSIZE)
{
cs.setReg(s->Sregmsw);
getlvalue_msw(&cs);
c = gen(c,&cs);
}
}
el_free(e);
return c;
}
/****************************
* Generate code for, and output a thunk.
* Input:
* thisty Type of this pointer
* p ESP parameter offset to this pointer
* d offset to add to 'this' pointer
* d2 offset from 'this' to vptr
* i offset into vtbl[]
*/
void cod3_thunk(symbol *sthunk,symbol *sfunc,unsigned p,tym_t thisty,
targ_size_t d,int i,targ_size_t d2)
{ code *c,*c1;
targ_size_t thunkoffset;
tym_t thunkty;
cod3_align();
/* Skip over return address */
thunkty = tybasic(sthunk->ty());
#if TARGET_SEGMENTED
if (tyfarfunc(thunkty))
p += I32 ? 8 : tysize[TYfptr]; /* far function */
else
#endif
p += tysize[TYnptr];
if (!I16)
{
/*
Generate:
ADD p[ESP],d
For direct call:
JMP sfunc
For virtual call:
MOV EAX, p[ESP] EAX = this
MOV EAX, d2[EAX] EAX = this->vptr
JMP i[EAX] jump to virtual function
*/
unsigned reg = 0;
if ((targ_ptrdiff_t)d < 0)
{
d = -d;
reg = 5; // switch from ADD to SUB
}
if (thunkty == TYmfunc)
{ // ADD ECX,d
c = CNIL;
if (d)
c = genc2(c,0x81,modregrm(3,reg,CX),d);
}
else if (thunkty == TYjfunc || (I64 && thunkty == TYnfunc))
{ // ADD EAX,d
c = CNIL;
if (d)
c = genc2(c,0x81,modregrm(3,reg,I64 ? DI : AX),d);
}
else
{
c = genc(CNIL,0x81,modregrm(2,reg,4),
FLconst,p, // to this
FLconst,d); // ADD p[ESP],d
c->Isib = modregrm(0,4,SP);
}
if (I64 && c)
c->Irex |= REX_W;
}
else
{
/*
Generate:
MOV BX,SP
ADD [SS:] p[BX],d
For direct call:
JMP sfunc
For virtual call:
MOV BX, p[BX] BX = this
MOV BX, d2[BX] BX = this->vptr
JMP i[BX] jump to virtual function
*/
c = genregs(CNIL,0x89,SP,BX); /* MOV BX,SP */
c1 = genc(CNIL,0x81,modregrm(2,0,7),
FLconst,p, /* to this */
FLconst,d); /* ADD p[BX],d */
if (config.wflags & WFssneds ||
// If DS needs reloading from SS,
// then assume SS != DS on thunk entry
(config.wflags & WFss && LARGEDATA))
c1->Iflags |= CFss; /* SS: */
c = cat(c,c1);
}
if ((i & 0xFFFF) != 0xFFFF) /* if virtual call */
{ code *c2,*c3;
#define FARTHIS (tysize(thisty) > REGSIZE)
#define FARVPTR FARTHIS
#if TARGET_SEGMENTED
assert(thisty != TYvptr); /* can't handle this case */
#endif
if (!I16)
{
assert(!FARTHIS && !LARGECODE);
if (thunkty == TYmfunc) // if 'this' is in ECX
{ c1 = CNIL;
// MOV EAX,d2[ECX]
c2 = genc1(CNIL,0x8B,modregrm(2,AX,CX),FLconst,d2);
}
else if (thunkty == TYjfunc) // if 'this' is in EAX
{ c1 = CNIL;
// MOV EAX,d2[EAX]
c2 = genc1(CNIL,0x8B,modregrm(2,AX,AX),FLconst,d2);
}
else
{
// MOV EAX,p[ESP]
c1 = genc1(CNIL,0x8B,(modregrm(0,4,SP) << 8) | modregrm(2,AX,4),FLconst,(targ_uns) p);
if (I64)
c1->Irex |= REX_W;
// MOV EAX,d2[EAX]
c2 = genc1(CNIL,0x8B,modregrm(2,AX,AX),FLconst,d2);
}
if (I64)
code_orrex(c2, REX_W);
/* JMP i[EAX] */
c3 = genc1(CNIL,0xFF,modregrm(2,4,0),FLconst,(targ_uns) i);
}
else
{
/* MOV/LES BX,[SS:] p[BX] */
c1 = genc1(CNIL,(FARTHIS ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,(targ_uns) p);
if (config.wflags & WFssneds ||
// If DS needs reloading from SS,
// then assume SS != DS on thunk entry
(config.wflags & WFss && LARGEDATA))
c1->Iflags |= CFss; /* SS: */
/* MOV/LES BX,[ES:]d2[BX] */
c2 = genc1(CNIL,(FARVPTR ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,d2);
if (FARTHIS)
c2->Iflags |= CFes; /* ES: */
/* JMP i[BX] */
c3 = genc1(CNIL,0xFF,modregrm(2,(LARGECODE ? 5 : 4),7),FLconst,(targ_uns) i);
if (FARVPTR)
c3->Iflags |= CFes; /* ES: */
}
c = cat4(c,c1,c2,c3);
}
else
{
c1 = gencs(CNIL,(LARGECODE ? 0xEA : 0xE9),0,FLfunc,sfunc); /* JMP sfunc */
c1->Iflags |= LARGECODE ? (CFseg | CFoff) : (CFselfrel | CFoff);
c = cat(c,c1);
}
thunkoffset = Coffset;
pinholeopt(c,NULL);
codout(c);
code_free(c);
sthunk->Soffset = thunkoffset;
sthunk->Ssize = Coffset - thunkoffset; /* size of thunk */
sthunk->Sseg = cseg;
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
objpubdef(cseg,sthunk,sthunk->Soffset);
#endif
searchfixlist(sthunk); /* resolve forward refs */
}
/*****************************
* Assume symbol s is extern.
*/
void makeitextern(symbol *s)
{
if (s->Sxtrnnum == 0)
{ s->Sclass = SCextern; /* external */
/*printf("makeitextern(x%x)\n",s);*/
objextern(s);
}
}
/*******************************
* Replace JMPs in Bgotocode with JMP SHORTs whereever possible.
* This routine depends on FLcode jumps to only be forward
* referenced.
* BFLjmpoptdone is set to TRUE if nothing more can be done
* with this block.
* Input:
* flag !=0 means don't have correct Boffsets yet
* Returns:
* number of bytes saved
*/
int branch(block *bl,int flag)
{ int bytesaved;
code *c,*cn,*ct;
targ_size_t offset,disp;
targ_size_t csize;
if (!flag)
bl->Bflags |= BFLjmpoptdone; // assume this will be all
c = bl->Bcode;
if (!c)
return 0;
bytesaved = 0;
offset = bl->Boffset; /* offset of start of block */
while (1)
{ unsigned char op;
csize = calccodsize(c);
cn = code_next(c);
op = c->Iop;
if ((op & ~0x0F) == 0x70 && c->Iflags & CFjmp16 ||
op == JMP)
{
L1:
switch (c->IFL2)
{
case FLblock:
if (flag) // no offsets yet, don't optimize
goto L3;
disp = c->IEV2.Vblock->Boffset - offset - csize;
/* If this is a forward branch, and there is an aligned
* block intervening, it is possible that shrinking
* the jump instruction will cause it to be out of
* range of the target. This happens if the alignment
* prevents the target block from moving correspondingly
* closer.
*/
if (disp >= 0x7F-4 && c->IEV2.Vblock->Boffset > offset)
{ /* Look for intervening alignment
*/
for (block *b = bl->Bnext; b; b = b->Bnext)
{
if (b->Balign)
{
bl->Bflags &= ~BFLjmpoptdone; // some JMPs left
goto L3;
}
if (b == c->IEV2.Vblock)
break;
}
}
break;
case FLcode:
{ code *cr;
disp = 0;
ct = c->IEV2.Vcode; /* target of branch */
assert(ct->Iflags & (CFtarg | CFtarg2));
for (cr = cn; cr; cr = code_next(cr))
{
if (cr == ct)
break;
disp += calccodsize(cr);
}
if (!cr)
{ // Didn't find it in forward search. Try backwards jump
int s = 0;
disp = 0;
for (cr = bl->Bcode; cr != cn; cr = code_next(cr))
{
assert(cr != NULL); // must have found it
if (cr == ct)
s = 1;
if (s)
disp += calccodsize(cr);
}
}
if (config.flags4 & CFG4optimized && !flag)
{
/* Propagate branch forward past junk */
while (1)
{ if (ct->Iop == NOP ||
ct->Iop == (ESCAPE | ESClinnum))
{ ct = code_next(ct);
if (!ct)
goto L2;
}
else
{ c->IEV2.Vcode = ct;
ct->Iflags |= CFtarg;
break;
}
}
/* And eliminate jmps to jmps */
if ((op == ct->Iop || ct->Iop == JMP) &&
(op == JMP || c->Iflags & CFjmp16))
{ c->IFL2 = ct->IFL2;
c->IEV2.Vcode = ct->IEV2.Vcode;
/*printf("eliminating branch\n");*/
goto L1;
}
L2: ;
}
}
break;
default:
goto L3;
}
if (disp == 0) // bra to next instruction
{ bytesaved += csize;
c->Iop = NOP; // del branch instruction
c->IEV2.Vcode = NULL;
c = cn;
if (!c)
break;
continue;
}
else if ((targ_size_t)(targ_schar)(disp - 2) == (disp - 2) &&
(targ_size_t)(targ_schar)disp == disp)
{
if (op == JMP)
{ c->Iop = JMPS; // JMP SHORT
bytesaved += I16 ? 1 : 3;
}
else // else Jcond
{ c->Iflags &= ~CFjmp16; // a branch is ok
bytesaved += I16 ? 3 : 4;
// Replace a cond jump around a call to a function that
// never returns with a cond jump to that function.
if (config.flags4 & CFG4optimized &&
config.target_cpu >= TARGET_80386 &&
disp == (I16 ? 3 : 5) &&
cn &&
cn->Iop == CALL &&
cn->IFL2 == FLfunc &&
cn->IEVsym2->Sflags & SFLexit &&
!(cn->Iflags & (CFtarg | CFtarg2))
)
{
cn->Iop = 0x0F00 | ((c->Iop & 0x0F) ^ 0x81);
c->Iop = NOP;
c->IEV2.Vcode = NULL;
bytesaved++;
// If nobody else points to ct, we can remove the CFtarg
if (flag && ct)
{ code *cx;
for (cx = bl->Bcode; 1; cx = code_next(cx))
{
if (!cx)
{ ct->Iflags &= ~CFtarg;
break;
}
if (cx->IEV2.Vcode == ct)
break;
}
}
}
}
csize = calccodsize(c);
}
else
bl->Bflags &= ~BFLjmpoptdone; // some JMPs left
}
L3:
if (cn)
{ offset += csize;
c = cn;
}
else
break;
}
//printf("bytesaved = x%x\n",bytesaved);
return bytesaved;
}
/************************************************
* Adjust all Soffset's of stack variables so they
* are all relative to the frame pointer.
*/
#if MARS
void cod3_adjSymOffsets()
{ SYMIDX si;
//printf("cod3_adjSymOffsets()\n");
for (si = 0; si < globsym.top; si++)
{ //printf("globsym.tab[%d] = %p\n",si,globsym.tab[si]);
symbol *s = globsym.tab[si];
switch (s->Sclass)
{
case SCparameter:
case SCregpar:
//printf("s = '%s', Soffset = x%x, Poff = x%x, EBPtoESP = x%x\n", s->Sident, s->Soffset, Poff, EBPtoESP);
s->Soffset += Poff;
if (0 && !(funcsym_p->Sfunc->Fflags3 & Fmember))
{
if (!hasframe)
s->Soffset += EBPtoESP;
if (funcsym_p->Sfunc->Fflags3 & Fnested)
s->Soffset += REGSIZE;
}
break;
case SCauto:
case SCfastpar:
case SCregister:
case_auto:
//printf("s = '%s', Soffset = x%x, Aoff = x%x, BPoff = x%x EBPtoESP = x%x\n", s->Sident, s->Soffset, Aoff, BPoff, EBPtoESP);
// if (!(funcsym_p->Sfunc->Fflags3 & Fnested))
s->Soffset += Aoff + BPoff;
break;
case SCbprel:
break;
default:
continue;
}
#if 0
if (!hasframe)
s->Soffset += EBPtoESP;
#endif
}
}
#endif
/*******************************
* Take symbol info in union ev and replace it with a real address
* in Vpointer.
*/
void assignaddr(block *bl)
{
int EBPtoESPsave = EBPtoESP;
int hasframesave = hasframe;
if (bl->Bflags & BFLoutsideprolog)
{ EBPtoESP = -REGSIZE;
hasframe = 0;
}
assignaddrc(bl->Bcode);
hasframe = hasframesave;
EBPtoESP = EBPtoESPsave;
}
void assignaddrc(code *c)
{
int sn;
symbol *s;
unsigned char ins,rm;
targ_size_t soff;
targ_size_t base;
base = EBPtoESP;
for (; c; c = code_next(c))
{
#ifdef DEBUG
if (0)
{ printf("assignaddrc()\n");
c->print();
}
if (code_next(c) && code_next(code_next(c)) == c)
assert(0);
#endif
if (c->Iflags & CFvex)
ins = vex_inssize(c);
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
ins = inssize2[(c->Iop >> 8) & 0xFF];
else if ((c->Iop & 0xFF00) == 0x0F00)
ins = inssize2[c->Iop & 0xFF];
else if ((c->Iop & 0xFF) == ESCAPE)
{
if (c->Iop == (ESCAPE | ESCadjesp))
{
//printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,c->IEV2.Vint);
EBPtoESP += c->IEV1.Vint;
c->Iop = NOP;
}
if (c->Iop == (ESCAPE | ESCframeptr))
{ // Convert to load of frame pointer
// c->Irm is the register to use
if (hasframe)
{ // MOV reg,EBP
c->Iop = 0x89;
if (c->Irm & 8)
c->Irex |= REX_B;
c->Irm = modregrm(3,BP,c->Irm & 7);
}
else
{ // LEA reg,EBPtoESP[ESP]
c->Iop = 0x8D;
if (c->Irm & 8)
c->Irex |= REX_R;
c->Irm = modregrm(2,c->Irm & 7,4);
c->Isib = modregrm(0,4,SP);
c->Iflags = CFoff;
c->IFL1 = FLconst;
c->IEV1.Vuns = EBPtoESP;
}
}
if (I64)
c->Irex |= REX_W;
continue;
}
else
ins = inssize[c->Iop & 0xFF];
if (!(ins & M) ||
((rm = c->Irm) & 0xC0) == 0xC0)
goto do2; /* if no first operand */
if (is32bitaddr(I32,c->Iflags))
{
if (
((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5))
)
goto do2; /* if no first operand */
}
else
{
if (
((rm & 0xC0) == 0 && !((rm & 7) == 6))
)
goto do2; /* if no first operand */
}
s = c->IEVsym1;
switch (c->IFL1)
{
#if OMFOBJ
case FLdata:
if (s->Sclass == SCcomdat)
{ c->IFL1 = FLextern;
goto do2;
}
#if MARS
c->IEVseg1 = s->Sseg;
#else
c->IEVseg1 = DATA;
#endif
c->IEVpointer1 += s->Soffset;
c->IFL1 = FLdatseg;
goto do2;
case FLudata:
#if MARS
c->IEVseg1 = s->Sseg;
#else
c->IEVseg1 = UDATA;
#endif
c->IEVpointer1 += s->Soffset;
c->IFL1 = FLdatseg;
goto do2;
#else // don't loose symbol information
case FLdata:
case FLudata:
case FLtlsdata:
c->IFL1 = FLextern;
goto do2;
#endif
case FLdatseg:
c->IEVseg1 = DATA;
goto do2;
#if TARGET_SEGMENTED
case FLfardata:
case FLcsdata:
#endif
case FLpseudo:
goto do2;
case FLstack:
//printf("Soffset = %d, EBPtoESP = %d, base = %d, pointer = %d\n",
//s->Soffset,EBPtoESP,base,c->IEVpointer1);
c->IEVpointer1 += s->Soffset + EBPtoESP - base - EEoffset;
break;
case FLreg:
case FLauto:
soff = Aoff;
L1:
if (s->Sflags & SFLunambig && !(s->Sflags & SFLread) && // if never loaded
!anyiasm &&
// if not optimized, leave it in for debuggability
(config.flags4 & CFG4optimized || !config.fulltypes))
{ c->Iop = NOP; // remove references to it
continue;
}
if (s->Sfl == FLreg && c->IEVpointer1 < 2)
{ int reg = s->Sreglsw;
assert(!(s->Sregm & ~mask[reg]));
if (c->IEVpointer1 == 1)
{ assert(reg < 4); /* must be a BYTEREGS */
reg |= 4; /* convert to high byte reg */
}
if (reg & 8)
{ assert(I64);
c->Irex |= REX_B;
reg &= 7;
}
c->Irm = (c->Irm & modregrm(0,7,0))
| modregrm(3,0,reg);
assert(c->Iop != LES && c->Iop != LEA);
goto do2;
}
else
{ c->IEVpointer1 += s->Soffset + soff + BPoff;
if (s->Sflags & SFLunambig)
c->Iflags |= CFunambig;
L2:
if (!hasframe)
{ /* Convert to ESP relative address instead of EBP */
unsigned char rm;
assert(!I16);
c->IEVpointer1 += EBPtoESP;
rm = c->Irm;
if ((rm & 7) == 4) // if SIB byte
{
assert((c->Isib & 7) == BP);
assert((rm & 0xC0) != 0);
c->Isib = (c->Isib & ~7) | modregrm(0,0,SP);
}
else
{
assert((rm & 7) == 5);
c->Irm = (rm & modregrm(0,7,0))
| modregrm(2,0,4);
c->Isib = modregrm(0,4,SP);
}
}
}
break;
case FLpara:
soff = Poff - BPoff; // cancel out add of BPoff
goto L1;
case FLtmp:
soff = Toff;
goto L1;
case FLfltreg:
c->IEVpointer1 += Foff + BPoff;
c->Iflags |= CFunambig;
goto L2;
case FLallocatmp:
c->IEVpointer1 += AAoff + BPoff;
goto L2;
case FLbprel:
c->IEVpointer1 += s->Soffset;
break;
case FLcs:
sn = c->IEV1.Vuns;
if (!CSE_loaded(sn)) // if never loaded
{ c->Iop = NOP;
continue;
}
c->IEVpointer1 = sn * REGSIZE + CSoff + BPoff;
c->Iflags |= CFunambig;
goto L2;
case FLregsave:
sn = c->IEV1.Vuns;
c->IEVpointer1 = sn + regsave.off + BPoff;
c->Iflags |= CFunambig;
goto L2;
case FLndp:
#if MARS
assert(c->IEV1.Vuns < NDP::savetop);
#endif
c->IEVpointer1 = c->IEV1.Vuns * NDPSAVESIZE + NDPoff + BPoff;
c->Iflags |= CFunambig;
goto L2;
case FLoffset:
break;
case FLlocalsize:
c->IEVpointer1 += localsize;
break;
case FLconst:
default:
goto do2;
}
c->IFL1 = FLconst;
do2:
/* Ignore TEST (F6 and F7) opcodes */
if (!(ins & T)) goto done; /* if no second operand */
s = c->IEVsym2;
switch (c->IFL2)
{
#if ELFOBJ || MACHOBJ
case FLdata:
case FLudata:
case FLtlsdata:
c->IFL2 = FLextern;
goto do2;
#else
case FLdata:
if (s->Sclass == SCcomdat)
{ c->IFL2 = FLextern;
goto do2;
}
#if MARS
c->IEVseg2 = s->Sseg;
#else
c->IEVseg2 = DATA;
#endif
c->IEVpointer2 += s->Soffset;
c->IFL2 = FLdatseg;
goto done;
case FLudata:
#if MARS
c->IEVseg2 = s->Sseg;
#else
c->IEVseg2 = UDATA;
#endif
c->IEVpointer2 += s->Soffset;
c->IFL2 = FLdatseg;
goto done;
#endif
case FLdatseg:
c->IEVseg2 = DATA;
goto done;
#if TARGET_SEGMENTED
case FLcsdata:
case FLfardata:
goto done;
#endif
case FLreg:
case FLpseudo:
assert(0);
/* NOTREACHED */
case FLauto:
c->IEVpointer2 += s->Soffset + Aoff + BPoff;
break;
case FLpara:
c->IEVpointer2 += s->Soffset + Poff;
break;
case FLtmp:
c->IEVpointer2 += s->Soffset + Toff + BPoff;
break;
case FLfltreg:
c->IEVpointer2 += Foff + BPoff;
break;
case FLallocatmp:
c->IEVpointer2 += AAoff + BPoff;
break;
case FLbprel:
c->IEVpointer2 += s->Soffset;
break;
case FLstack:
c->IEVpointer2 += s->Soffset + EBPtoESP - base;
break;
case FLcs:
case FLndp:
case FLregsave:
assert(0);
/* NOTREACHED */
case FLconst:
break;
case FLlocalsize:
c->IEVpointer2 += localsize;
break;
default:
goto done;
}
c->IFL2 = FLconst;
done:
;
}
}
/*******************************
* Return offset from BP of symbol s.
*/
targ_size_t cod3_bpoffset(symbol *s)
{ targ_size_t offset;
symbol_debug(s);
offset = s->Soffset;
switch (s->Sfl)
{
case FLpara:
offset += Poff;
break;
case FLauto:
offset += Aoff + BPoff;
break;
case FLtmp:
offset += Toff + BPoff;
break;
default:
#ifdef DEBUG
WRFL((enum FL)s->Sfl);
symbol_print(s);
#endif
assert(0);
}
assert(hasframe);
return offset;
}
/*******************************
* Find shorter versions of the same instructions.
* Does these optimizations:
* replaces jmps to the next instruction with NOPs
* sign extension of modregrm displacement
* sign extension of immediate data (can't do it for OR, AND, XOR
* as the opcodes are not defined)
* short versions for AX EA
* short versions for reg EA
* Input:
* b -> block for code (or NULL)
*/
void pinholeopt(code *c,block *b)
{ targ_size_t a;
unsigned op,mod;
unsigned char ins;
int usespace;
int useopsize;
int space;
block *bn;
#ifdef DEBUG
static int tested; if (!tested) { tested++; pinholeopt_unittest(); }
#endif
#if 0
code *cstart = c;
if (debugc)
{
printf("+pinholeopt(%p)\n",c);
}
#endif
if (b)
{ bn = b->Bnext;
usespace = (config.flags4 & CFG4space && b->BC != BCasm);
useopsize = (I16 || (config.flags4 & CFG4space && b->BC != BCasm));
}
else
{ bn = NULL;
usespace = (config.flags4 & CFG4space);
useopsize = (I16 || config.flags4 & CFG4space);
}
for (; c; c = code_next(c))
{
L1:
op = c->Iop;
if (c->Iflags & CFvex)
ins = vex_inssize(c);
else if ((op & 0xFFFD00) == 0x0F3800)
ins = inssize2[(op >> 8) & 0xFF];
else if ((op & 0xFF00) == 0x0F00)
ins = inssize2[op & 0xFF];
else
ins = inssize[op & 0xFF];
if (ins & M) // if modregrm byte
{ int shortop = (c->Iflags & CFopsize) ? !I16 : I16;
int local_BPRM = BPRM;
if (c->Iflags & CFaddrsize)
local_BPRM ^= 5 ^ 6; // toggle between 5 and 6
unsigned rm = c->Irm;
unsigned reg = rm & modregrm(0,7,0); // isolate reg field
unsigned ereg = rm & 7;
//printf("c = %p, op = %02x rm = %02x\n", c, op, rm);
/* If immediate second operand */
if ((ins & T ||
((op == 0xF6 || op == 0xF7) && (reg < modregrm(0,2,0) || reg > modregrm(0,3,0)))
) &&
c->IFL2 == FLconst)
{
int flags = c->Iflags & CFpsw; /* if want result in flags */
targ_long u = c->IEV2.Vuns;
if (ins & E)
u = (signed char) u;
else if (shortop)
u = (short) u;
// Replace CMP reg,0 with TEST reg,reg
if ((op & 0xFE) == 0x80 && // 80 is CMP R8,imm8; 81 is CMP reg,imm
rm >= modregrm(3,7,AX) &&
u == 0)
{ c->Iop = (op & 1) | 0x84;
c->Irm = modregrm(3,ereg,ereg);
if (c->Irex & REX_B)
c->Irex |= REX_R;
goto L1;
}
/* Optimize ANDs with an immediate constant */
if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0))
{
if (rm >= modregrm(3,4,AX)) // AND reg,imm
{
if (u == 0)
{ /* Replace with XOR reg,reg */
c->Iop = 0x30 | (op & 1);
c->Irm = modregrm(3,ereg,ereg);
if (c->Irex & REX_B)
c->Irex |= REX_R;
goto L1;
}
if (u == 0xFFFFFFFF && !flags)
{ c->Iop = NOP;
goto L1;
}
}
if (op == 0x81 && !flags)
{ // If we can do the operation in one byte
// If EA is not SI or DI
if ((rm < modregrm(3,4,SP) || I64) &&
(config.flags4 & CFG4space ||
config.target_cpu < TARGET_PentiumPro)
)
{
if ((u & 0xFFFFFF00) == 0xFFFFFF00)
goto L2;
else if (rm < modregrm(3,0,0) || (!c->Irex && ereg < 4))
{ if (!shortop)
{ if ((u & 0xFFFF00FF) == 0xFFFF00FF)
goto L3;
}
else
{
if ((u & 0xFF) == 0xFF)
goto L3;
}
}
}
if (!shortop && useopsize)
{
if ((u & 0xFFFF0000) == 0xFFFF0000)
{ c->Iflags ^= CFopsize;
goto L1;
}
if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX))
{ c->IEVoffset1 += 2; /* address MSW */
c->IEV2.Vuns >>= 16;
c->Iflags ^= CFopsize;
goto L1;
}
if (rm >= modregrm(3,4,AX))
{
if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64))
{ c->Iop = 0x0FB6; // MOVZX
c->Irm = modregrm(3,ereg,ereg);
if (c->Irex & REX_B)
c->Irex |= REX_R;
goto L1;
}
if (u == 0xFFFF)
{ c->Iop = 0x0FB7; // MOVZX
c->Irm = modregrm(3,ereg,ereg);
if (c->Irex & REX_B)
c->Irex |= REX_R;
goto L1;
}
}
}
}
}
/* Look for ADD,OR,SUB,XOR with u that we can eliminate */
if (!flags &&
(op == 0x81 || op == 0x80) &&
(reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || // ADD,OR
reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) // SUB, XOR
)
{
if (u == 0)
{
c->Iop = NOP;
goto L1;
}
if (u == ~0 && reg == modregrm(0,6,0)) /* XOR */
{
c->Iop = 0xF6 | (op & 1); /* NOT */
c->Irm ^= modregrm(0,6^2,0);
goto L1;
}
if (!shortop &&
useopsize &&
op == 0x81 &&
(u & 0xFFFF0000) == 0 &&
(reg == modregrm(0,6,0) || reg == modregrm(0,1,0)))
{ c->Iflags ^= CFopsize;
goto L1;
}
}
/* Look for TEST or OR or XOR with an immediate constant */
/* that we can replace with a byte operation */
if (op == 0xF7 && reg == modregrm(0,0,0) ||
op == 0x81 && reg == modregrm(0,6,0) && !flags ||
op == 0x81 && reg == modregrm(0,1,0))
{
// See if we can replace a dword with a word
// (avoid for 32 bit instructions, because CFopsize
// is too slow)
if (!shortop && useopsize)
{ if ((u & 0xFFFF0000) == 0)
{ c->Iflags ^= CFopsize;
goto L1;
}
/* If memory (not register) addressing mode */
if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX))
{ c->IEVoffset1 += 2; /* address MSW */
c->IEV2.Vuns >>= 16;
c->Iflags ^= CFopsize;
goto L1;
}
}
// If EA is not SI or DI
if (rm < (modregrm(3,0,SP) | reg) &&
(usespace ||
config.target_cpu < TARGET_PentiumPro)
)
{
if ((u & 0xFFFFFF00) == 0)
{
L2: c->Iop--; /* to byte instruction */
c->Iflags &= ~CFopsize;
goto L1;
}
if (((u & 0xFFFF00FF) == 0 ||
(shortop && (u & 0xFF) == 0)) &&
(rm < modregrm(3,0,0) || (!c->Irex && ereg < 4)))
{
L3:
c->IEV2.Vuns >>= 8;
if (rm >= (modregrm(3,0,AX) | reg))
c->Irm |= 4; /* AX->AH, BX->BH, etc. */
else
c->IEVoffset1 += 1;
goto L2;
}
}
#if 0
// BUG: which is right?
else if ((u & 0xFFFF0000) == 0)
#else
else if (0 && op == 0xF7 &&
rm >= modregrm(3,0,SP) &&
(u & 0xFFFF0000) == 0)
#endif
c->Iflags &= ~CFopsize;
}
// Try to replace TEST reg,-1 with TEST reg,reg
if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8
{ if ((u & 0xFF) == 0xFF)
{
L4: c->Iop = 0x84; // TEST regL,regL
c->Irm = modregrm(3,ereg,ereg);
if (c->Irex & REX_B)
c->Irex |= REX_R;
c->Iflags &= ~CFopsize;
goto L1;
}
}
if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4))
{ if (u == 0xFF)
goto L4;
if ((u & 0xFFFF) == 0xFF00 && shortop && !c->Irex && ereg < 4)
{ ereg |= 4; /* to regH */
goto L4;
}
}
/* Look for sign extended immediate data */
if ((signed char) u == u)
{
if (op == 0x81)
{ if (reg != 0x08 && reg != 0x20 && reg != 0x30)
c->Iop = op = 0x83; /* 8 bit sgn ext */
}
else if (op == 0x69) /* IMUL rw,ew,dw */
c->Iop = op = 0x6B; /* IMUL rw,ew,db */
}
// Look for SHIFT EA,imm8 we can replace with short form
if (u == 1 && ((op & 0xFE) == 0xC0))
c->Iop |= 0xD0;
} /* if immediate second operand */
/* Look for AX short form */
if (ins & A)
{ if (rm == modregrm(0,AX,local_BPRM) &&
!(c->Irex & REX_R) && // and it's AX, not R8
(op & ~3) == 0x88 &&
!I64)
{ op = ((op & 3) + 0xA0) ^ 2;
/* 8A-> A0 */
/* 8B-> A1 */
/* 88-> A2 */
/* 89-> A3 */
c->Iop = op;
c->IFL2 = c->IFL1;
c->IEV2 = c->IEV1;
}
/* Replace MOV REG1,REG2 with MOV EREG1,EREG2 */
else if (!I16 &&
(op == 0x89 || op == 0x8B) &&
(rm & 0xC0) == 0xC0 &&
(!b || b->BC != BCasm)
)
c->Iflags &= ~CFopsize;
// If rm is AX
else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c->Irex & (REX_R | REX_B)))
{ switch (op)
{ case 0x80: op = reg | 4; break;
case 0x81: op = reg | 5; break;
case 0x87: op = 0x90 + (reg>>3); break; // XCHG
case 0xF6:
if (reg == 0)
op = 0xA8; /* TEST AL,immed8 */
break;
case 0xF7:
if (reg == 0)
op = 0xA9; /* TEST AX,immed16 */
break;
}
c->Iop = op;
}
}
/* Look for reg short form */
if ((ins & R) && (rm & 0xC0) == 0xC0)
{ switch (op)
{ case 0xC6: op = 0xB0 + ereg; break;
case 0xC7: op = 0xB8 + ereg; break;
case 0xFF:
switch (reg)
{ case 6<<3: op = 0x50+ereg; break;/* PUSH*/
case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/
case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/
}
break;
case 0x8F: op = 0x58 + ereg; break;
case 0x87:
if (reg == 0) op = 0x90 + ereg;
break;
}
c->Iop = op;
}
// Look to replace SHL reg,1 with ADD reg,reg
if ((op & ~1) == 0xD0 &&
(rm & modregrm(3,7,0)) == modregrm(3,4,0) &&
config.target_cpu >= TARGET_80486)
{
c->Iop &= 1;
c->Irm = (rm & modregrm(3,0,7)) | (ereg << 3);
if (c->Irex & REX_B)
c->Irex |= REX_R;
if (!(c->Iflags & CFpsw) && !I16)
c->Iflags &= ~CFopsize;
goto L1;
}
/* Look for sign extended modregrm displacement, or 0
* displacement.
*/
if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp
c->IFL1 == FLconst) // and it's a constant
{
a = c->IEVpointer1;
if (a == 0 && (rm & 7) != local_BPRM && // if 0[disp]
!(local_BPRM == 5 && (rm & 7) == 4 && (c->Isib & 7) == BP)
)
c->Irm &= 0x3F;
else if (!I16)
{
if ((targ_size_t)(targ_schar)a == a)
c->Irm ^= 0xC0; /* do 8 sx */
}
else if (((targ_size_t)(targ_schar)a & 0xFFFF) == (a & 0xFFFF))
c->Irm ^= 0xC0; /* do 8 sx */
}
/* Look for LEA reg,[ireg], replace with MOV reg,ireg */
else if (op == 0x8D)
{ rm = c->Irm & 7;
mod = c->Irm & modregrm(3,0,0);
if (mod == 0)
{
if (!I16)
{
switch (rm)
{
case 4:
case 5:
break;
default:
c->Irm |= modregrm(3,0,0);
c->Iop = 0x8B;
break;
}
}
else
{
switch (rm)
{
case 4: rm = modregrm(3,0,SI); goto L6;
case 5: rm = modregrm(3,0,DI); goto L6;
case 7: rm = modregrm(3,0,BX); goto L6;
L6: c->Irm = rm + reg;
c->Iop = 0x8B;
break;
}
}
}
/* replace LEA reg,0[BP] with MOV reg,BP */
else if (mod == modregrm(1,0,0) && rm == local_BPRM &&
c->IFL1 == FLconst && c->IEVpointer1 == 0)
{ c->Iop = 0x8B; /* MOV reg,BP */
c->Irm = modregrm(3,0,BP) + reg;
}
}
// Replace [R13] with 0[R13]
if (c->Irex & REX_B && (c->Irm & modregrm(3,0,5)) == modregrm(0,0,5))
{
c->Irm |= modregrm(1,0,0);
c->IFL1 = FLconst;
c->IEVpointer1 = 0;
}
}
else if (!(c->Iflags & CFvex))
{
switch (op)
{
default:
if ((op & ~0x0F) != 0x70)
break;
case JMP:
switch (c->IFL2)
{ case FLcode:
if (c->IEV2.Vcode == code_next(c))
{ c->Iop = NOP;
continue;
}
break;
case FLblock:
if (!code_next(c) && c->IEV2.Vblock == bn)
{ c->Iop = NOP;
continue;
}
break;
case FLconst:
case FLfunc:
case FLextern:
break;
default:
#ifdef DEBUG
WRFL((enum FL)c->IFL2);
#endif
assert(0);
}
break;
case 0x68: // PUSH immed16
if (c->IFL2 == FLconst)
{
targ_long u = c->IEV2.Vuns;
if (I64 ||
((c->Iflags & CFopsize) ? I16 : I32))
{ // PUSH 32/64 bit operand
if (u == (signed char) u)
c->Iop = 0x6A; // PUSH immed8
}
else // PUSH 16 bit operand
{ if ((short)u == (signed char) u)
c->Iop = 0x6A; // PUSH immed8
}
}
break;
}
}
}
#if 0
if (1 || debugc) {
printf("-pinholeopt(%p)\n",cstart);
for (c = cstart; c; c = code_next(c))
c->print();
}
#endif
}
#ifdef DEBUG
STATIC void pinholeopt_unittest()
{
//printf("pinholeopt_unittest()\n");
struct CS { unsigned model,op,ea,ev1,ev2,flags; } tests[][2] =
{
// XOR reg,immed NOT regL
{{ 16,0x81,modregrm(3,6,BX),0,0xFF,0 }, { 0,0xF6,modregrm(3,2,BX),0,0xFF }},
// MOV 0[BX],3 MOV [BX],3
{{ 16,0xC7,modregrm(2,0,7),0,3}, { 0,0xC7,modregrm(0,0,7),0,3 }},
#if 0 // only if config.flags4 & CFG4space
// TEST regL,immed8
{{ 0,0xF6,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }},
{{ 0,0xF7,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }},
{{ 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }},
{{ 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }},
#endif
// PUSH immed => PUSH immed8
{{ 0,0x68,0,0,0 }, { 0,0x6A,0,0,0 }},
{{ 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }},
{{ 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }},
{{ 16,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }},
{{ 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }},
{{ 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }},
{{ 16,0x68,0,0,0x10000,0 }, { 0,0x6A,0,0,0x10000,0 }},
{{ 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }},
{{ 32,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }},
{{ 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }},
{{ 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }},
{{ 32,0x68,0,0,0x10000,CFopsize }, { 0,0x6A,0,0,0x10000,CFopsize }},
{{ 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }},
};
//config.flags4 |= CFG4space;
for (int i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
{ CS *pin = &tests[i][0];
CS *pout = &tests[i][1];
code cs;
memset(&cs, 0, sizeof(cs));
if (pin->model)
{
if (I16 && pin->model != 16)
continue;
if (I32 && pin->model != 32)
continue;
if (I64 && pin->model != 64)
continue;
}
//printf("[%d]\n", i);
cs.Iop = pin->op;
cs.Iea = pin->ea;
cs.IFL1 = FLconst;
cs.IFL2 = FLconst;
cs.IEV1.Vuns = pin->ev1;
cs.IEV2.Vuns = pin->ev2;
cs.Iflags = pin->flags;
pinholeopt(&cs, NULL);
if (cs.Iop != pout->op)
{ printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout->op);
assert(0);
}
assert(cs.Iea == pout->ea);
assert(cs.IEV1.Vuns == pout->ev1);
assert(cs.IEV2.Vuns == pout->ev2);
assert(cs.Iflags == pout->flags);
}
}
#endif
/**************************
* Compute jump addresses for FLcode.
* Note: only works for forward referenced code.
* only direct jumps and branches are detected.
* LOOP instructions only work for backward refs.
*/
void jmpaddr(code *c)
{ code *ci,*cn,*ctarg,*cstart;
targ_size_t ad;
unsigned op;
//printf("jmpaddr()\n");
cstart = c; /* remember start of code */
while (c)
{
op = c->Iop;
if (op <= 0xEB &&
inssize[op] & T && // if second operand
c->IFL2 == FLcode &&
((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ || op == CALL))
{ ci = code_next(c);
ctarg = c->IEV2.Vcode; /* target code */
ad = 0; /* IP displacement */
while (ci && ci != ctarg)
{
ad += calccodsize(ci);
ci = code_next(ci);
}
if (!ci)
goto Lbackjmp; // couldn't find it
if (!I16 || op == JMP || op == JMPS || op == JCXZ || op == CALL)
c->IEVpointer2 = ad;
else /* else conditional */
{ if (!(c->Iflags & CFjmp16)) /* if branch */
c->IEVpointer2 = ad;
else /* branch around a long jump */
{ cn = code_next(c);
code_next(c) = code_calloc();
code_next(code_next(c)) = cn;
c->Iop = op ^ 1; /* converse jmp */
c->Iflags &= ~CFjmp16;
c->IEVpointer2 = I16 ? 3 : 5;
cn = code_next(c);
cn->Iop = JMP; /* long jump */
cn->IFL2 = FLconst;
cn->IEVpointer2 = ad;
}
}
c->IFL2 = FLconst;
}
if (op == LOOP && c->IFL2 == FLcode) /* backwards refs */
{
Lbackjmp:
ctarg = c->IEV2.Vcode;
for (ci = cstart; ci != ctarg; ci = code_next(ci))
if (!ci || ci == c)
assert(0);
ad = 2; /* - IP displacement */
while (ci != c)
{ assert(ci);
ad += calccodsize(ci);
ci = code_next(ci);
}
c->IEVpointer2 = (-ad) & 0xFF;
c->IFL2 = FLconst;
}
c = code_next(c);
}
}
/*******************************
* Calculate bl->Bsize.
*/
unsigned calcblksize(code *c)
{ unsigned size;
for (size = 0; c; c = code_next(c))
{
unsigned sz = calccodsize(c);
//printf("off=%02x, sz = %d, code %p: op=%02x\n", size, sz, c, c->Iop);
size += sz;
}
//printf("calcblksize(c = x%x) = %d\n", c, size);
return size;
}
/*****************************
* Calculate and return code size of a code.
* Note that NOPs are sometimes used as markers, but are
* never output. LINNUMs are never output.
* Note: This routine must be fast. Profiling shows it is significant.
*/
unsigned calccodsize(code *c)
{ unsigned size;
unsigned op;
unsigned char rm,mod,ins;
unsigned iflags;
unsigned i32 = I32 || I64;
unsigned a32 = i32;
#ifdef DEBUG
assert((a32 & ~1) == 0);
#endif
iflags = c->Iflags;
op = c->Iop;
if (iflags & CFvex)
{
ins = vex_inssize(c);
size = ins & 7;
goto Lmodrm;
}
else if ((op & 0xFF00) == 0x0F00 || (op & 0xFFFD00) == 0x0F3800)
op = 0x0F;
else
op &= 0xFF;
switch (op)
{
case 0x0F:
if ((c->Iop & 0xFFFD00) == 0x0F3800)
{ // 3 byte op ( 0F38-- or 0F3A-- )
ins = inssize2[(c->Iop >> 8) & 0xFF];
size = ins & 7;
if (c->Iop & 0xFF000000)
size++;
}
else
{ // 2 byte op ( 0F-- )
ins = inssize2[c->Iop & 0xFF];
size = ins & 7;
if (c->Iop & 0xFF0000)
size++;
}
break;
case NOP:
case ESCAPE:
size = 0; // since these won't be output
goto Lret2;
case ASM:
if (c->Iflags == CFaddrsize) // kludge for DA inline asm
size = NPTRSIZE;
else
size = c->IEV1.as.len;
goto Lret2;
case 0xA1:
case 0xA3:
if (c->Irex)
{
size = 9; // 64 bit immediate value for MOV to/from RAX
goto Lret;
}
goto Ldefault;
case 0xF6: /* TEST mem8,immed8 */
ins = inssize[op];
size = ins & 7;
if (i32)
size = inssize32[op];
if ((c->Irm & (7<<3)) == 0)
size++; /* size of immed8 */
break;
case 0xF7:
ins = inssize[op];
size = ins & 7;
if (i32)
size = inssize32[op];
if ((c->Irm & (7<<3)) == 0)
size += (i32 ^ ((iflags & CFopsize) !=0)) ? 4 : 2;
break;
default:
Ldefault:
ins = inssize[op];
size = ins & 7;
if (i32)
size = inssize32[op];
}
if (iflags & (CFwait | CFopsize | CFaddrsize | CFSEG))
{
if (iflags & CFwait) // if add FWAIT prefix
size++;
if (iflags & CFSEG) // if segment override
size++;
// If the instruction has a second operand that is not an 8 bit,
// and the operand size prefix is present, then fix the size computation
// because the operand size will be different.
// Walter, I had problems with this bit at the end. There can still be
// an ADDRSIZE prefix for these and it does indeed change the operand size.
if (iflags & (CFopsize | CFaddrsize))
{
if ((ins & (T|E)) == T)
{
if ((op & 0xAC) == 0xA0)
{
if (iflags & CFaddrsize && !I64)
{ if (I32)
size -= 2;
else
size += 2;
}
}
else if (iflags & CFopsize)
{ if (I16)
size += 2;
else
size -= 2;
}
}
if (iflags & CFaddrsize)
{ if (!I64)
a32 ^= 1;
size++;
}
if (iflags & CFopsize)
size++; /* +1 for OPSIZE prefix */
}
}
Lmodrm:
if ((op & ~0x0F) == 0x70)
{ if (iflags & CFjmp16) // if long branch
size += I16 ? 3 : 4; // + 3(4) bytes for JMP
}
else if (ins & M) // if modregrm byte
{
rm = c->Irm;
mod = rm & 0xC0;
if (a32 || I64)
{ // 32 bit addressing
if (issib(rm))
size++;
switch (mod)
{ case 0:
if (issib(rm) && (c->Isib & 7) == 5 ||
(rm & 7) == 5)
size += 4; /* disp32 */
if (c->Irex & REX_B && (rm & 7) == 5)
/* Instead of selecting R13, this mode is an [RIP] relative
* address. Although valid, it's redundant, and should not
* be generated. Instead, generate 0[R13] instead of [R13].
*/
assert(0);
break;
case 0x40:
size++; /* disp8 */
break;
case 0x80:
size += 4; /* disp32 */
break;
}
}
else
{ // 16 bit addressing
if (mod == 0x40) /* 01: 8 bit displacement */
size++;
else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6))
size += 2;
}
}
Lret:
if (!(iflags & CFvex) && c->Irex)
{ size++;
if (c->Irex & REX_W && (op & ~7) == 0xB8)
size += 4;
}
Lret2:
//printf("op = x%02x, size = %d\n",op,size);
return size;
}
/********************************
* Return !=0 if codes match.
*/
#if 0
int code_match(code *c1,code *c2)
{ code cs1,cs2;
unsigned char ins;
if (c1 == c2)
goto match;
cs1 = *c1;
cs2 = *c2;
if (cs1.Iop != cs2.Iop)
goto nomatch;
switch (cs1.Iop)
{
case ESCAPE | ESCctor:
case ESCAPE | ESCdtor:
goto nomatch;
case NOP:
goto match;
case ASM:
if (cs1.IEV1.as.len == cs2.IEV1.as.len &&
memcmp(cs1.IEV1.as.bytes,cs2.IEV1.as.bytes,cs1.EV1.as.len) == 0)
goto match;
else
goto nomatch;
default:
if ((cs1.Iop & 0xFF) == ESCAPE)
goto match;
break;
}
if (cs1.Iflags != cs2.Iflags)
goto nomatch;
ins = inssize[cs1.Iop & 0xFF];
if ((cs1.Iop & 0xFFFD00) == 0x0F3800)
{
ins = inssize2[(cs1.Iop >> 8) & 0xFF];
}
else if ((cs1.Iop & 0xFF00) == 0x0F00)
{
ins = inssize2[cs1.Iop & 0xFF];
}
if (ins & M) // if modregrm byte
{
if (cs1.Irm != cs2.Irm)
goto nomatch;
if ((cs1.Irm & 0xC0) == 0xC0)
goto do2;
if (is32bitaddr(I32,cs1.Iflags))
{
if (issib(cs1.Irm) && cs1.Isib != cs2.Isib)
goto nomatch;
if (
((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5))
)
goto do2; /* if no first operand */
}
else
{
if (
((rm & 0xC0) == 0 && !((rm & 7) == 6))
)
goto do2; /* if no first operand */
}
if (cs1.IFL1 != cs2.IFL1)
goto nomatch;
if (flinsymtab[cs1.IFL1] && cs1.IEVsym1 != cs2.IEVsym1)
goto nomatch;
if (cs1.IEVoffset1 != cs2.IEVoffset1)
goto nomatch;
}
do2:
if (!(ins & T)) // if no second operand
goto match;
if (cs1.IFL2 != cs2.IFL2)
goto nomatch;
if (flinsymtab[cs1.IFL2] && cs1.IEVsym2 != cs2.IEVsym2)
goto nomatch;
if (cs1.IEVoffset2 != cs2.IEVoffset2)
goto nomatch;
match:
return 1;
nomatch:
return 0;
}
#endif
/**************************
* Write code to intermediate file.
* Code starts at offset.
* Returns:
* addr of end of code
*/
static targ_size_t offset; /* to save code use a global */
static char bytes[100];
static char *pgen;
#define GEN(c) (*pgen++ = (c))
#define GENP(n,p) (memcpy(pgen,(p),(n)), pgen += (n))
#if ELFOBJ || MACHOBJ || _MSC_VER
#define FLUSH() if (pgen-bytes) cod3_flush()
#else
#define FLUSH() ((pgen - bytes) && cod3_flush())
#endif
#define OFFSET() (offset + (pgen - bytes))
STATIC void cod3_flush()
{
// Emit accumulated bytes to code segment
#ifdef DEBUG
assert(pgen - bytes < sizeof(bytes));
#endif
offset += obj_bytes(cseg,offset,pgen - bytes,bytes);
pgen = bytes;
}
unsigned codout(code *c)
{ unsigned op;
unsigned char rm,mod;
unsigned char ins;
code *cn;
unsigned flags;
symbol *s;
#ifdef DEBUG
if (debugc) printf("codout(%p), Coffset = x%llx\n",c,(unsigned long long)Coffset);
#endif
pgen = bytes;
offset = Coffset;
for (; c; c = code_next(c))
{
#ifdef DEBUG
if (debugc) { printf("off=%02lx, sz=%ld, ",(long)OFFSET(),(long)calccodsize(c)); c->print(); }
unsigned startoffset = OFFSET();
#endif
op = c->Iop;
ins = inssize[op & 0xFF];
switch (op & 0xFF)
{ case ESCAPE:
/* Check for SSE4 opcode v/pmaxuw xmm1,xmm2/m128 */
if(op == 0x660F383E || c->Iflags & CFvex) break;
switch (op & 0xFFFF00)
{ case ESClinnum:
/* put out line number stuff */
objlinnum(c->IEV1.Vsrcpos,OFFSET());
break;
#if SCPP
#if 1
case ESCctor:
case ESCdtor:
case ESCoffset:
if (config.exe != EX_NT)
except_pair_setoffset(c,OFFSET() - funcoffset);
break;
case ESCmark:
case ESCrelease:
case ESCmark2:
case ESCrelease2:
break;
#else
case ESCctor:
except_push(OFFSET() - funcoffset,c->IEV1.Vtor,NULL);
break;
case ESCdtor:
except_pop(OFFSET() - funcoffset,c->IEV1.Vtor,NULL);
break;
case ESCmark:
except_mark();
break;
case ESCrelease:
except_release();
break;
#endif
#endif
}
#ifdef DEBUG
assert(calccodsize(c) == 0);
#endif
continue;
case NOP: /* don't send them out */
if (op != NOP)
break;
#ifdef DEBUG
assert(calccodsize(c) == 0);
#endif
continue;
case ASM:
if (op != ASM)
break;
FLUSH();
if (c->Iflags == CFaddrsize) // kludge for DA inline asm
{
do32bit(FLblockoff,&c->IEV1,0);
}
else
{
offset += obj_bytes(cseg,offset,c->IEV1.as.len,c->IEV1.as.bytes);
}
#ifdef DEBUG
assert(calccodsize(c) == c->IEV1.as.len);
#endif
continue;
}
flags = c->Iflags;
// See if we need to flush (don't have room for largest code sequence)
if (pgen - bytes > sizeof(bytes) - (1+4+4+8+8))
FLUSH();
// see if we need to put out prefix bytes
if (flags & (CFwait | CFPREFIX | CFjmp16))
{ int override;
if (flags & CFwait)
GEN(0x9B); // FWAIT
/* ? SEGES : SEGSS */
switch (flags & CFSEG)
{ case CFes: override = SEGES; goto segover;
case CFss: override = SEGSS; goto segover;
case CFcs: override = SEGCS; goto segover;
case CFds: override = SEGDS; goto segover;
case CFfs: override = SEGFS; goto segover;
case CFgs: override = SEGGS; goto segover;
segover: GEN(override);
break;
}
if (flags & CFaddrsize)
GEN(0x67);
// Do this last because of instructions like ADDPD
if (flags & CFopsize)
GEN(0x66); /* operand size */
if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */
{
if (!I16)
{ // Put out 16 bit conditional jump
c->Iop = op = 0x0F00 | (0x80 | (op & 0x0F));
}
else
{
cn = code_calloc();
/*cxcalloc++;*/
code_next(cn) = code_next(c);
code_next(c) = cn; // link into code
cn->Iop = JMP; // JMP block
cn->IFL2 = c->IFL2;
cn->IEV2.Vblock = c->IEV2.Vblock;
c->Iop = op ^= 1; // toggle condition
c->IFL2 = FLconst;
c->IEVpointer2 = I16 ? 3 : 5; // skip over JMP block
c->Iflags &= ~CFjmp16;
}
}
}
if (flags & CFvex)
{
if (flags & CFvex3)
{
GEN(0xC4);
GEN(VEX3_B1(c->Ivex));
GEN(VEX3_B2(c->Ivex));
GEN(c->Ivex.op);
}
else
{
GEN(0xC5);
GEN(VEX2_B1(c->Ivex));
GEN(c->Ivex.op);
}
ins = vex_inssize(c);
goto Lmodrm;
}
if (op > 0xFF)
{
if ((op & 0xFFFD00) == 0x0F3800)
ins = inssize2[(op >> 8) & 0xFF];
else if ((op & 0xFF00) == 0x0F00)
ins = inssize2[op & 0xFF];
if (op & 0xFF000000)
{
unsigned char op1 = op >> 24;
if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66)
{
GEN(op1);
if (c->Irex)
GEN(c->Irex | REX);
}
else
{
if (c->Irex)
GEN(c->Irex | REX);
GEN(op1);
}
GEN((op >> 16) & 0xFF);
GEN((op >> 8) & 0xFF);
GEN(op & 0xFF);
}
else if (op & 0xFF0000)
{
unsigned char op1 = op >> 16;
if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66)
{
GEN(op1);
if (c->Irex)
GEN(c->Irex | REX);
}
else
{
if (c->Irex)
GEN(c->Irex | REX);
GEN(op1);
}
GEN((op >> 8) & 0xFF);
GEN(op & 0xFF);
}
else
{
if (c->Irex)
GEN(c->Irex | REX);
GEN((op >> 8) & 0xFF);
GEN(op & 0xFF);
}
}
else
{
if (c->Irex)
GEN(c->Irex | REX);
GEN(op);
}
Lmodrm:
if (ins & M) /* if modregrm byte */
{
rm = c->Irm;
GEN(rm);
// Look for an address size override when working with the
// MOD R/M and SIB bytes
if (is32bitaddr( I32, flags))
{
if (issib(rm))
GEN(c->Isib);
switch (rm & 0xC0)
{ case 0x40:
do8bit((enum FL) c->IFL1,&c->IEV1); // 8 bit
break;
case 0:
if (!(issib(rm) && (c->Isib & 7) == 5 ||
(rm & 7) == 5))
break;
case 0x80:
{ int flags = CFoff;
targ_size_t val = 0;
if (I64)
{
if ((rm & modregrm(3,0,7)) == modregrm(0,0,5)) // if disp32[RIP]
{ flags |= CFpc32;
val = -4;
unsigned reg = rm & modregrm(0,7,0);
if (ins & T ||
((op == 0xF6 || op == 0xF7) && (reg == modregrm(0,0,0) || reg == modregrm(0,1,0))))
{ if (ins & E)
val = -5;
else if (c->Iflags & CFopsize)
val = -6;
else
val = -8;
}
#if TARGET_OSX
// Mach-O linkage already takes the 4 byte size into account
val += 4;
#endif
}
}
do32bit((enum FL)c->IFL1,&c->IEV1,flags,val);
break;
}
}
}
else
{
switch (rm & 0xC0)
{ case 0x40:
do8bit((enum FL) c->IFL1,&c->IEV1); // 8 bit
break;
case 0:
if ((rm & 7) != 6)
break;
case 0x80:
do16bit((enum FL)c->IFL1,&c->IEV1,CFoff);
break;
}
}
}
else
{
if (op == 0xC8)
do16bit((enum FL)c->IFL1,&c->IEV1,0);
}
flags &= CFseg | CFoff | CFselfrel;
if (ins & T) /* if second operand */
{ if (ins & E) /* if data-8 */
do8bit((enum FL) c->IFL2,&c->IEV2);
else if (!I16)
{
switch (op)
{ case 0xC2: /* RETN imm16 */
case 0xCA: /* RETF imm16 */
do16:
do16bit((enum FL)c->IFL2,&c->IEV2,flags);
break;
case 0xA1:
case 0xA3:
if (I64 && c->Irex)
{
do64:
do64bit((enum FL)c->IFL2,&c->IEV2,flags);
break;
}
case 0xA0: /* MOV AL,byte ptr [] */
case 0xA2:
if (c->Iflags & CFaddrsize && !I64)
goto do16;
else
do32:
do32bit((enum FL)c->IFL2,&c->IEV2,flags);
break;
case 0x9A:
case 0xEA:
if (c->Iflags & CFopsize)
goto ptr1616;
else
goto ptr1632;
case 0x68: // PUSH immed32
if ((enum FL)c->IFL2 == FLblock)
{
c->IFL2 = FLblockoff;
goto do32;
}
else
goto case_default;
case CALL: // CALL rel
case JMP: // JMP rel
flags |= CFselfrel;
goto case_default;
default:
if ((op|0xF) == 0x0F8F) // Jcc rel16 rel32
flags |= CFselfrel;
if (I64 && (op & ~7) == 0xB8 && c->Irex & REX_W)
goto do64;
case_default:
if (c->Iflags & CFopsize)
goto do16;
else
goto do32;
break;
}
}
else
{
switch (op) {
case 0xC2:
case 0xCA:
goto do16;
case 0xA0:
case 0xA1:
case 0xA2:
case 0xA3:
if (c->Iflags & CFaddrsize)
goto do32;
else
goto do16;
break;
case 0x9A:
case 0xEA:
if (c->Iflags & CFopsize)
goto ptr1632;
else
goto ptr1616;
ptr1616:
ptr1632:
//assert(c->IFL2 == FLfunc);
FLUSH();
if (c->IFL2 == FLdatseg)
{
reftodatseg(cseg,offset,c->IEVpointer2,
c->IEVseg2,flags);
offset += 4;
}
else
{
s = c->IEVsym2;
offset += reftoident(cseg,offset,s,0,flags);
}
break;
case 0x68: // PUSH immed16
if ((enum FL)c->IFL2 == FLblock)
{ c->IFL2 = FLblockoff;
goto do16;
}
else
goto case_default16;
case CALL:
case JMP:
flags |= CFselfrel;
default:
case_default16:
if (c->Iflags & CFopsize)
goto do32;
else
goto do16;
break;
}
}
}
else if (op == 0xF6) /* TEST mem8,immed8 */
{ if ((rm & (7<<3)) == 0)
do8bit((enum FL)c->IFL2,&c->IEV2);
}
else if (op == 0xF7)
{ if ((rm & (7<<3)) == 0) /* TEST mem16/32,immed16/32 */
{
if ((I32 || I64) ^ ((c->Iflags & CFopsize) != 0))
do32bit((enum FL)c->IFL2,&c->IEV2,flags);
else
do16bit((enum FL)c->IFL2,&c->IEV2,flags);
}
}
#ifdef DEBUG
if (OFFSET() - startoffset != calccodsize(c))
{
printf("actual: %d, calc: %d\n", (int)(OFFSET() - startoffset), (int)calccodsize(c));
c->print();
assert(0);
}
#endif
}
FLUSH();
Coffset = offset;
//printf("-codout(), Coffset = x%x\n", Coffset);
return offset; /* ending address */
}
STATIC void do64bit(enum FL fl,union evc *uev,int flags)
{ char *p;
symbol *s;
targ_size_t ad;
assert(I64);
switch (fl)
{
case FLconst:
ad = * (targ_size_t *) uev;
L1:
GENP(8,&ad);
return;
case FLdatseg:
FLUSH();
reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg,CFoffset64 | flags);
break;
case FLframehandler:
framehandleroffset = OFFSET();
ad = 0;
goto L1;
case FLswitch:
FLUSH();
ad = uev->Vswitch->Btableoffset;
if (config.flags & CFGromable)
reftocodseg(cseg,offset,ad);
else
reftodatseg(cseg,offset,ad,JMPSEG,CFoff);
break;
#if TARGET_SEGMENTED
case FLcsdata:
case FLfardata:
#if DEBUG
symbol_print(uev->sp.Vsym);
#endif
#endif
// NOTE: In ELFOBJ all symbol refs have been tagged FLextern
// strings and statics are treated like offsets from a
// un-named external with is the start of .rodata or .data
case FLextern: /* external data symbol */
case FLtlsdata:
#if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
case FLgot:
case FLgotoff:
#endif
FLUSH();
s = uev->sp.Vsym; /* symbol pointer */
reftoident(cseg,offset,s,uev->sp.Voffset,CFoffset64 | flags);
break;
#if TARGET_OSX
case FLgot:
funcsym_p->Slocalgotoffset = OFFSET();
ad = 0;
goto L1;
#endif
case FLfunc: /* function call */
s = uev->sp.Vsym; /* symbol pointer */
assert(TARGET_SEGMENTED || !tyfarfunc(s->ty()));
FLUSH();
reftoident(cseg,offset,s,0,CFoffset64 | flags);
break;
case FLblock: /* displacement to another block */
ad = uev->Vblock->Boffset - OFFSET() - 4;
//printf("FLblock: funcoffset = %x, OFFSET = %x, Boffset = %x, ad = %x\n", funcoffset, OFFSET(), uev->Vblock->Boffset, ad);
goto L1;
case FLblockoff:
FLUSH();
assert(uev->Vblock);
//printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", offset, uev->Vblock->Boffset, funcoffset);
reftocodseg(cseg,offset,uev->Vblock->Boffset);
break;
default:
#ifdef DEBUG
WRFL(fl);
#endif
assert(0);
}
offset += 8;
}
STATIC void do32bit(enum FL fl,union evc *uev,int flags, targ_size_t val)
{ char *p;
symbol *s;
targ_size_t ad;
//printf("do32bit(flags = x%x)\n", flags);
switch (fl)
{
case FLconst:
assert(sizeof(targ_size_t) == 4 || sizeof(targ_size_t) == 8);
ad = * (targ_size_t *) uev;
L1:
GENP(4,&ad);
return;
case FLdatseg:
FLUSH();
reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg,flags);
break;
case FLframehandler:
framehandleroffset = OFFSET();
ad = 0;
goto L1;
case FLswitch:
FLUSH();
ad = uev->Vswitch->Btableoffset;
if (config.flags & CFGromable)
reftocodseg(cseg,offset,ad);
else
reftodatseg(cseg,offset,ad,JMPSEG,CFoff);
break;
#if TARGET_SEGMENTED
case FLcsdata:
case FLfardata:
#if DEBUG
symbol_print(uev->sp.Vsym);
#endif
#endif
// NOTE: In ELFOBJ all symbol refs have been tagged FLextern
// strings and statics are treated like offsets from a
// un-named external with is the start of .rodata or .data
case FLextern: /* external data symbol */
case FLtlsdata:
#if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
case FLgot:
case FLgotoff:
#endif
FLUSH();
s = uev->sp.Vsym; /* symbol pointer */
reftoident(cseg,offset,s,uev->sp.Voffset + val,flags);
break;
#if TARGET_OSX
case FLgot:
funcsym_p->Slocalgotoffset = OFFSET();
ad = 0;
goto L1;
#endif
case FLfunc: /* function call */
s = uev->sp.Vsym; /* symbol pointer */
#if TARGET_SEGMENTED
if (tyfarfunc(s->ty()))
{ /* Large code references are always absolute */
FLUSH();
offset += reftoident(cseg,offset,s,0,flags) - 4;
}
else if (s->Sseg == cseg &&
(s->Sclass == SCstatic || s->Sclass == SCglobal) &&
s->Sxtrnnum == 0 && flags & CFselfrel)
{ /* if we know it's relative address */
ad = s->Soffset - OFFSET() - 4;
goto L1;
}
else
#endif
{
assert(TARGET_SEGMENTED || !tyfarfunc(s->ty()));
FLUSH();
reftoident(cseg,offset,s,val,flags);
}
break;
case FLblock: /* displacement to another block */
ad = uev->Vblock->Boffset - OFFSET() - 4;
//printf("FLblock: funcoffset = %x, OFFSET = %x, Boffset = %x, ad = %x\n", funcoffset, OFFSET(), uev->Vblock->Boffset, ad);
goto L1;
case FLblockoff:
FLUSH();
assert(uev->Vblock);
//printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", offset, uev->Vblock->Boffset, funcoffset);
reftocodseg(cseg,offset,uev->Vblock->Boffset);
break;
default:
#ifdef DEBUG
WRFL(fl);
#endif
assert(0);
}
offset += 4;
}
STATIC void do16bit(enum FL fl,union evc *uev,int flags)
{ char *p;
symbol *s;
targ_size_t ad;
switch (fl)
{
case FLconst:
GENP(2,(char *) uev);
return;
case FLdatseg:
FLUSH();
reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg,flags);
break;
case FLswitch:
FLUSH();
ad = uev->Vswitch->Btableoffset;
if (config.flags & CFGromable)
reftocodseg(cseg,offset,ad);
else
reftodatseg(cseg,offset,ad,JMPSEG,CFoff);
break;
#if TARGET_SEGMENTED
case FLcsdata:
case FLfardata:
#endif
case FLextern: /* external data symbol */
case FLtlsdata:
assert(SIXTEENBIT || TARGET_SEGMENTED);
FLUSH();
s = uev->sp.Vsym; /* symbol pointer */
reftoident(cseg,offset,s,uev->sp.Voffset,flags);
break;
case FLfunc: /* function call */
assert(SIXTEENBIT || TARGET_SEGMENTED);
s = uev->sp.Vsym; /* symbol pointer */
if (tyfarfunc(s->ty()))
{ /* Large code references are always absolute */
FLUSH();
offset += reftoident(cseg,offset,s,0,flags) - 2;
}
else if (s->Sseg == cseg &&
(s->Sclass == SCstatic || s->Sclass == SCglobal) &&
s->Sxtrnnum == 0 && flags & CFselfrel)
{ /* if we know it's relative address */
ad = s->Soffset - OFFSET() - 2;
goto L1;
}
else
{ FLUSH();
reftoident(cseg,offset,s,0,flags);
}
break;
case FLblock: /* displacement to another block */
ad = uev->Vblock->Boffset - OFFSET() - 2;
#ifdef DEBUG
{
targ_ptrdiff_t delta = uev->Vblock->Boffset - OFFSET() - 2;
assert((signed short)delta == delta);
}
#endif
L1:
GENP(2,&ad); // displacement
return;
case FLblockoff:
FLUSH();
reftocodseg(cseg,offset,uev->Vblock->Boffset);
break;
default:
#ifdef DEBUG
WRFL(fl);
#endif
assert(0);
}
offset += 2;
}
STATIC void do8bit(enum FL fl,union evc *uev)
{ char c;
targ_ptrdiff_t delta;
switch (fl)
{
case FLconst:
c = uev->Vuns;
break;
case FLblock:
delta = uev->Vblock->Boffset - OFFSET() - 1;
if ((signed char)delta != delta)
{
#if MARS
if (uev->Vblock->Bsrcpos.Slinnum)
fprintf(stderr, "%s(%d): ", uev->Vblock->Bsrcpos.Sfilename, uev->Vblock->Bsrcpos.Slinnum);
#endif
fprintf(stderr, "block displacement of %lld exceeds the maximum offset of -128 to 127.\n", (long long)delta);
err_exit();
}
c = delta;
#ifdef DEBUG
assert(uev->Vblock->Boffset > OFFSET() || c != 0x7F);
#endif
break;
default:
#ifdef DEBUG
fprintf(stderr,"fl = %d\n",fl);
#endif
assert(0);
}
GEN(c);
}
/**********************************
*/
#if HYDRATE
void code_hydrate(code **pc)
{
code *c;
unsigned char ins,rm;
enum FL fl;
assert(pc);
while (*pc)
{
c = (code *) ph_hydrate(pc);
if (c->Iflags & CFvex)
ins = vex_inssize(c);
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
ins = inssize2[(c->Iop >> 8) & 0xFF];
else if ((c->Iop & 0xFF00) == 0x0F00)
ins = inssize2[c->Iop & 0xFF];
else
ins = inssize[c->Iop & 0xFF];
switch (c->Iop)
{
default:
break;
case ESCAPE | ESClinnum:
srcpos_hydrate(&c->IEV1.Vsrcpos);
goto done;
case ESCAPE | ESCctor:
case ESCAPE | ESCdtor:
el_hydrate(&c->IEV1.Vtor);
goto done;
case ASM:
ph_hydrate(&c->IEV1.as.bytes);
goto done;
}
if (!(ins & M) ||
((rm = c->Irm) & 0xC0) == 0xC0)
goto do2; /* if no first operand */
if (is32bitaddr(I32,c->Iflags))
{
if (
((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5))
)
goto do2; /* if no first operand */
}
else
{
if (
((rm & 0xC0) == 0 && !((rm & 7) == 6))
)
goto do2; /* if no first operand */
}
fl = (enum FL) c->IFL1;
switch (fl)
{
case FLudata:
case FLdata:
case FLreg:
case FLauto:
case FLbprel:
case FLpara:
#if TARGET_SEGMENTED
case FLcsdata:
case FLfardata:
#endif
case FLtlsdata:
case FLfunc:
case FLpseudo:
case FLextern:
case FLtmp:
assert(flinsymtab[fl]);
symbol_hydrate(&c->IEVsym1);
symbol_debug(c->IEVsym1);
break;
case FLdatseg:
case FLfltreg:
case FLallocatmp:
case FLcs:
case FLndp:
case FLoffset:
case FLlocalsize:
case FLconst:
case FLframehandler:
assert(!flinsymtab[fl]);
break;
case FLcode:
(void) ph_hydrate(&c->IEV1.Vcode);
break;
case FLblock:
case FLblockoff:
(void) ph_hydrate(&c->IEV1.Vblock);
break;
#if SCPP
case FLctor:
case FLdtor:
el_hydrate(&c->IEV1.Vtor);
break;
#endif
case FLasm:
(void) ph_hydrate(&c->IEV1.as.bytes);
break;
default:
#ifdef DEBUG
WRFL(fl);
#endif
assert(0);
break;
}
do2:
/* Ignore TEST (F6 and F7) opcodes */
if (!(ins & T))
goto done; /* if no second operand */
fl = (enum FL) c->IFL2;
switch (fl)
{
case FLudata:
case FLdata:
case FLreg:
case FLauto:
case FLbprel:
case FLpara:
#if TARGET_SEGMENTED
case FLcsdata:
case FLfardata:
#endif
case FLtlsdata:
case FLfunc:
case FLpseudo:
case FLextern:
case FLtmp:
assert(flinsymtab[fl]);
symbol_hydrate(&c->IEVsym2);
symbol_debug(c->IEVsym2);
break;
case FLdatseg:
case FLfltreg:
case FLallocatmp:
case FLcs:
case FLndp:
case FLoffset:
case FLlocalsize:
case FLconst:
case FLframehandler:
assert(!flinsymtab[fl]);
break;
case FLcode:
(void) ph_hydrate(&c->IEV2.Vcode);
break;
case FLblock:
case FLblockoff:
(void) ph_hydrate(&c->IEV2.Vblock);
break;
default:
#ifdef DEBUG
WRFL(fl);
#endif
assert(0);
break;
}
done:
;
pc = &code_next(c);
}
}
#endif
/**********************************
*/
#if DEHYDRATE
void code_dehydrate(code **pc)
{
code *c;
unsigned char ins,rm;
enum FL fl;
while ((c = *pc) != NULL)
{
ph_dehydrate(pc);
if (c->Iflags & CFvex)
ins = vex_inssize(c);
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
ins = inssize2[(c->Iop >> 8) & 0xFF];
else if ((c->Iop & 0xFF00) == 0x0F00)
ins = inssize2[c->Iop & 0xFF];
else
ins = inssize[c->Iop & 0xFF];
switch (c->Iop)
{
default:
break;
case ESCAPE | ESClinnum:
srcpos_dehydrate(&c->IEV1.Vsrcpos);
goto done;
case ESCAPE | ESCctor:
case ESCAPE | ESCdtor:
el_dehydrate(&c->IEV1.Vtor);
goto done;
case ASM:
ph_dehydrate(&c->IEV1.as.bytes);
goto done;
}
if (!(ins & M) ||
((rm = c->Irm) & 0xC0) == 0xC0)
goto do2; /* if no first operand */
if (is32bitaddr(I32,c->Iflags))
{
if (
((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5))
)
goto do2; /* if no first operand */
}
else
{
if (
((rm & 0xC0) == 0 && !((rm & 7) == 6))
)
goto do2; /* if no first operand */
}
fl = (enum FL) c->IFL1;
switch (fl)
{
case FLudata:
case FLdata:
case FLreg:
case FLauto:
case FLbprel:
case FLpara:
#if TARGET_SEGMENTED
case FLcsdata:
case FLfardata:
#endif
case FLtlsdata:
case FLfunc:
case FLpseudo:
case FLextern:
case FLtmp:
assert(flinsymtab[fl]);
symbol_dehydrate(&c->IEVsym1);
break;
case FLdatseg:
case FLfltreg:
case FLallocatmp:
case FLcs:
case FLndp:
case FLoffset:
case FLlocalsize:
case FLconst:
case FLframehandler:
assert(!flinsymtab[fl]);
break;
case FLcode:
ph_dehydrate(&c->IEV1.Vcode);
break;
case FLblock:
case FLblockoff:
ph_dehydrate(&c->IEV1.Vblock);
break;
#if SCPP
case FLctor:
case FLdtor:
el_dehydrate(&c->IEV1.Vtor);
break;
#endif
case FLasm:
ph_dehydrate(&c->IEV1.as.bytes);
break;
default:
#ifdef DEBUG
WRFL(fl);
#endif
assert(0);
break;
}
do2:
/* Ignore TEST (F6 and F7) opcodes */
if (!(ins & T))
goto done; /* if no second operand */
fl = (enum FL) c->IFL2;
switch (fl)
{
case FLudata:
case FLdata:
case FLreg:
case FLauto:
case FLbprel:
case FLpara:
#if TARGET_SEGMENTED
case FLcsdata:
case FLfardata:
#endif
case FLtlsdata:
case FLfunc:
case FLpseudo:
case FLextern:
case FLtmp:
assert(flinsymtab[fl]);
symbol_dehydrate(&c->IEVsym2);
break;
case FLdatseg:
case FLfltreg:
case FLallocatmp:
case FLcs:
case FLndp:
case FLoffset:
case FLlocalsize:
case FLconst:
case FLframehandler:
assert(!flinsymtab[fl]);
break;
case FLcode:
ph_dehydrate(&c->IEV2.Vcode);
break;
case FLblock:
case FLblockoff:
ph_dehydrate(&c->IEV2.Vblock);
break;
default:
#ifdef DEBUG
WRFL(fl);
#endif
assert(0);
break;
}
done:
;
pc = &code_next(c);
}
}
#endif
/***************************
* Debug code to dump code stucture.
*/
#if DEBUG
void WRcodlst(code *c)
{ for (; c; c = code_next(c))
c->print();
}
void code::print()
{
unsigned char ins;
unsigned char rexb;
code *c = this;
if (c == CNIL)
{ printf("code 0\n");
return;
}
unsigned op = c->Iop;
if (c->Iflags & CFvex)
ins = vex_inssize(c);
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
ins = inssize2[(op >> 8) & 0xFF];
else if ((c->Iop & 0xFF00) == 0x0F00)
ins = inssize2[op & 0xFF];
else
ins = inssize[op & 0xFF];
printf("code %p: nxt=%p ",c,code_next(c));
if (c->Iflags & CFvex)
{
if (c->Iflags & CFvex3)
{ printf("vex=0xC4");
printf(" 0x%02X", VEX3_B1(c->Ivex));
printf(" 0x%02X", VEX3_B2(c->Ivex));
rexb =
( c->Ivex.w ? REX_W : 0) |
(!c->Ivex.r ? REX_R : 0) |
(!c->Ivex.x ? REX_X : 0) |
(!c->Ivex.b ? REX_B : 0);
}
else
{ printf("vex=0xC5");
printf(" 0x%02X", VEX2_B1(c->Ivex));
rexb = !c->Ivex.r ? REX_R : 0;
}
printf(" ");
}
else
rexb = c->Irex;
if (rexb)
{ printf("rex=0x%02X ", c->Irex);
if (rexb & REX_W)
printf("W");
if (rexb & REX_R)
printf("R");
if (rexb & REX_X)
printf("X");
if (rexb & REX_B)
printf("B");
printf(" ");
}
printf("op=0x%02X",op);
if ((op & 0xFF) == ESCAPE)
{ if ((op & 0xFF00) == ESClinnum)
{ printf(" linnum = %d\n",c->IEV1.Vsrcpos.Slinnum);
return;
}
printf(" ESCAPE %d",c->Iop >> 8);
}
if (c->Iflags)
printf(" flg=%x",c->Iflags);
if (ins & M)
{ unsigned rm = c->Irm;
printf(" rm=0x%02X=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7);
if (!I16 && issib(rm))
{ unsigned char sib = c->Isib;
printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7);
}
if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40)
{
switch (c->IFL1)
{
case FLconst:
case FLoffset:
printf(" int = %4d",c->IEV1.Vuns);
break;
case FLblock:
printf(" block = %p",c->IEV1.Vblock);
break;
case FLswitch:
case FLblockoff:
case FLlocalsize:
case FLframehandler:
case 0:
break;
case FLdatseg:
printf(" %d.%llx",c->IEVseg1,(unsigned long long)c->IEVpointer1);
break;
case FLauto:
case FLreg:
case FLdata:
case FLudata:
case FLpara:
case FLtmp:
case FLbprel:
case FLtlsdata:
printf(" sym='%s'",c->IEVsym1->Sident);
break;
case FLextern:
printf(" FLextern offset = %4d",(int)c->IEVoffset1);
break;
default:
WRFL((enum FL)c->IFL1);
break;
}
}
}
if (ins & T)
{ printf(" "); WRFL((enum FL)c->IFL2);
switch (c->IFL2)
{
case FLconst:
printf(" int = %4d",c->IEV2.Vuns);
break;
case FLblock:
printf(" block = %p",c->IEV2.Vblock);
break;
case FLswitch:
case FLblockoff:
case 0:
case FLlocalsize:
case FLframehandler:
break;
case FLdatseg:
printf(" %d.%llx",c->IEVseg2,(unsigned long long)c->IEVpointer2);
break;
case FLauto:
case FLreg:
case FLpara:
case FLtmp:
case FLbprel:
case FLfunc:
case FLdata:
case FLudata:
case FLtlsdata:
printf(" sym='%s'",c->IEVsym2->Sident);
break;
case FLcode:
printf(" code = %p",c->IEV2.Vcode);
break;
default:
WRFL((enum FL)c->IFL2);
break;
}
}
printf("\n");
}
#endif
#endif // !SPP