mirror of
https://github.com/xomboverlord/ldc.git
synced 2026-01-12 02:43:14 +01:00
3443f38 Fix issue 7493 Initialization of void[][N]
0b371da foreach can run semantic again
7216e2a fix Issue 7735 - Functions with variadic void[][]... arguments corrupt passed data
4fb2b2a Merge pull request #850 from 9rnsr/fix7773
9c59931 Merge pull request #851 from donc/ctfe7785pointerToVar
407f7e4 Merge pull request #852 from donc/segfault7639
9370f83 Fix issue 7380 Crash trying to use address of variable in struct constructor at module level
240866b Fix issue 7639 Undefined enum AA key crashes compiler
19b7096 Fix issue 7785 [CTFE] ICE when slicing pointer to variable
d9b11f6 fix Issue 7773 - UCFS syntax on built-in attributes too?
296d812 Merge pull request #846 from donc/ctfe7781segfault
65aca2d Merge pull request #848 from donc/regression7751
5576737 Merge pull request #849 from donc/bug7794
0310838 Merge pull request #828 from 9rnsr/fix7751
4027e4f Fix issue 7794 Sea of errors when calling regex() after compile error
59cc12d Fix issue 7781 [CTFE] Segmentation fault on 'mixin({return;}());'
3430947 fix seg fault in fail91.d
948274e Merge pull request #824 from donc/regression7745
22ac4b1 Merge pull request #826 from 9rnsr/fix6659
1c15841 Merge pull request #823 from redstar/mscclean
5f54752 Merge pull request #827 from 9rnsr/fix7694
399e4a3 Merge pull request #844 from donc/regression7782
516f49b Fix issue 7789 [CTFE] null pointer exception on setting array length
d74b354 Fix issue 7782 Regression: ICE with wrong import syntax
0269194 Fix issue 7751 [ICE] (Regression 2.059head) From auto and forward reference
42ad236 Merge pull request #830 from 9rnsr/fix_ufcs
67bf025 Merge pull request #832 from 9rnsr/fix7608
d13f107 Merge pull request #829 from 9rnsr/fix7754
e25cbe2 Merge pull request #834 from 9rnsr/fix2367
7fac235 merge D2 pull #842
c836773 Merge pull request #836 from 9rnsr/fix7757
a2754c5 Merge pull request #839 from 9rnsr/fix7768
4948836 fix Issue 7694 - Internal error: e2ir.c 1251 when calling member function inside struct via alias param
9f23335 Merge pull request #838 from 9rnsr/fix7621
92eba60 Merge pull request #840 from 9rnsr/fix7769
8fae3c2 fix issue 7742 - 'More initializers than fields' error with correct number of fields
6c2d706 to enum
35e4f08 fix Issue 7769 - relax inout rule doesn't work for template function
96a0105 fix Issue 7768 - More readable template error messages
8012d58 Merge pull request #831 from 9rnsr/fix7743
9c0cbdd fix Issue 7621 - Immutable type equivalence problem
f67f313 Merge pull request #833 from 9rnsr/fix7731
29754dd Merge pull request #837 from braddr/cleanup-backend2
374109a restore original binary() function and re-fix the new version
78c04aa fix Issue 7757 - Inout function with lazy inout parameter doesn't compile
50c34e9 fix Issue 7754 - static this() in template is stripped during header gen
11acdff Fix auto tester breaking.
f0b7157 fix Issue 7755 - regression(2.059head): ICE in glue.c
cfceb77 fix Issue 7751 - [ICE] From auto and forward reference
7a86807 fix Issue 2367 - Overloading error with string literals
6039c40 fix Issue 7731 - Assertion failure: 't' on line 7911 in file 'mtype.c'
aea3a39 fix Issue 7608 - __traits(allMembers) is broken
f46f07a fix Issue 7743 - Parsing problem with nothrow delegate
fa9d29f Revert "Revert "Refactor for UFCS property getter/setter resolution.""
d9698d8 Revert "Revert "fix Issue 7722 - Refuse normal functions to be used as properties""
0fbc772 Revert "Revert "Allow property function has two arguments""
07a3b09 fix Issue 6659 - Destructor in range foreach called after initialization
e499d4d Fix issue 7745 Regression(2.059beta) Methods defined in external object files when a pointer to it is taken
79a74e1 Fixes an unknown pragma warning.
2b12052 Fix issue 176 [module] message "module and package have the same name"
90e89a4 Merge pull request #814 from 9rnsr/fix7713
3ab0e79 Merge pull request #818 from donc/assoc7732
b3360e9 Fix issue 7732 [CTFE] wrong code for a struct called AssociativeArray
05f0b08 Merge pull request #779 from 9rnsr/fix7534
867e567 Revert "Allow property function has two arguments"
9171aeb Revert "fix Issue 7722 - Refuse normal functions to be used as properties"
989ced7 Revert "Refactor for UFCS property getter/setter resolution."
e9b5292 Refactor for UFCS property getter/setter resolution.
761d000 fix Issue 7722 - Refuse normal functions to be used as properties
9f5956b Allow property function has two arguments
1a11862 Revert "Allow property function has two arguments"
32f57e5 Revert "fix Issue 7722 - Refuse normal functions to be used as properties"
6489bb4 Revert "Refactor for UFCS property getter/setter resolution."
214296f Merge pull request #817 from 9rnsr/fix_ufcs
c3c7f2a Merge pull request #816 from donc/voidctfe6438
185d031 Refactor for UFCS property getter/setter resolution.
08bf89d fix Issue 7722 - Refuse normal functions to be used as properties
f0e3433 Allow property function has two arguments
1b67ac9 Direct check by Type::reliesOnTident
a3cd7d9 fix Issue 7713 - lambda inference doesn't work on template function argument
1762112 Fix issue 6438 - [CTFE] wrong error "value used before set" when slicing =void array
ace1eca fix complex constant folding
76f9b22 Consider return type covariance.
f700dbc fix Issue 7534 - Allow attribute-overloading of an overridden method
cba8f5c Merge pull request #763 from 9rnsr/fix7578
392d93f Merge pull request #815 from dawgfoto/fixSegFault
e48aba2 merge part of pull #769
d72a17e revert dd5a543
24d860b error(Loc loc,) doesn't abort program
4c79117 Use correct opcodes for moving cfloat from st->xmm and xmm->st
af875ff Merge pull request #785 from braddr/cleanup-backend2
9d3021a remove debugging printfs
b3df5ee Merge pull request #807 from dawgfoto/fix7698
f005537 Merge pull request #802 from dawgfoto/fixVC
65a145d Merge pull request #803 from donc/ctfeunion6681yebblies
1cf39ca Merge pull request #812 from 9rnsr/fix_ufcs
d846c3c Merge pull request #808 from 9rnsr/fix7702
fd0a492 fix Issue 7670 - UFCS problem with @property and structs
1ad35b2 Fix for UFCS with property syntax, and add exhaustive test
96f15a1 Resolve broken build after merging
4712aab fix regression
4e05482 Merge pull request #805 from donc/regression7681
245a107 dt_ functions aren't x86 specific
b35f43a another missing loc in an error() call
001addb minor cleanups
2fb1e46 make util_assert take a const string
907da39 cleanup whitespace in binary(), add binary() that takes the length of the string to search for
59d0425 Merge pull request #804 from braddr/nearsighted
d725eed Merge pull request #806 from donc/ctfe7633equalmsg
12a5c26 Merge pull request #811 from donc/bug7699
4279d5e revert the revert
c895c3b revert pull #809
865fb20 fix Issue 5733 - Calling opDispatch As Template Results in Compiler Infinite Loop
96e16d3 fix Issue 7702 - opDispatch goes into infinite loop
5e343c0 Remove special case for DotIdExp and opDispatch semantic, it isn't need anymore
1a9d607 Fix issue 7699 - Cannot get frame pointer to in contract when compiling with -inline
d1476eb Merge pull request #809 from 9rnsr/fix_funclit
afc7c60 allow out-of-order semantic analysis of fields
17da3a0 fix Issue 7705 - lambda syntax doesn't allow some valid signatures
e29d06d fix issue 7698
911d053 Fix issue 7633 - Missing CTFE error message
3802dde Fix issue 7681 Regression(2.059head):ICE:opCatAssign(delegate) to undefined identifier
8da4121 near-ectomy
cd6dc83 fix Library::error()s format string to take a const char*
f3f03c6 switch to apply()
faf873a fix Issue 3510 - Cannot forward reference a templated type from within a template mixin
23aa2be fix Issue 3509 - Cannot forward reference a template mixin's members in a compile-time context
e81309b Add missing 'loc' to error message.
b6898e3 Fix issue 6681 - struct constructor call is converted to struct literal that breaks union initialization
b79afba long double => longdouble
e48c319 Merge pull request #742 from yebblies/issue5879
d74485a Merge pull request #787 from eco/ddoc-srcfilename
3038cb9 Merge pull request #795 from dawgfoto/fixComment
89a039a Merge pull request #801 from dawgfoto/fix4507
c17c2d8 fix issue 4507
dd86c72 Merge pull request #796 from dawgfoto/fixVC
a516588 Merge pull request #797 from 9rnsr/fix7682
1b9839a Merge pull request #799 from 9rnsr/fix6982
4596774 Merge pull request #800 from 9rnsr/fix_type_deduction
b68d546 forgot about @system
bfe1083 add attributes to toHash
8f819d6 Stop special case in mutableOf/makeMutable with inout type.
319b1a3 Fix the lacks of type merging in Type::mutableOf() and uhSharedOf()
cfe7450 fix Issue 7671 - Broken inout deduction of shared(inout(T[n])) from immutable(int[3])
aca5c37 Stop too eager call of TypeAArray::getImpl() When implicitConvTo(non aa Tstruct => Taarray)
50b2a97 fix Issue 6982 - immutability isn't respected on associative array assignment
a5daa5e fix Issue 7684 - IFTI and shared overload doesn't work
e43fbac fix Issue 7682 - shared array type and "cast() is not an lvalue" error
8191801 cpp_prettyident only needed for C++
4487f75 fix ldval
525647c tparam is the specialization
f893925 fix issue 7592 d847c1c2dd
108b25d Merge pull request #780 from 9rnsr/fix7641
105a51f Merge pull request #784 from 9rnsr/fix7110
8b5b67f Merge pull request #792 from donc/bug7667
f72f237 fix Issue 3682 - Regression(2.038) is expression fails to match types
436b711 Fix issue 7667. ICE(interpret.c): 'ctfeStack.stackPointer() == 0'
9005276 Merge pull request #679 from yebblies/issue783
350a3ce Merge pull request #582 from 9rnsr/fix3382_ufcs
5f020c3 Merge pull request #788 from braddr/cleanup-backend3
6aa91cf Merge pull request #790 from p0nce/master
351d595 remove tls bracketing
a137d72 Fix bug #6391
6ce219c remove some of the bracketing
aec4c13 fix Issue 7578 - ICE on indexing result of vararg opDispatch
95e3dc1 Fix unintended infinite loop in Phobos build
b66196a fix Issue 3382 - [tdpl] Implement uniform function call syntax
ee2fe6c Fix 977 is with counting end-of-lines towards msot advanced lexer peeking
7790b16 fix Issue 7650 - Bad lambda inference in associative array literal
c03484e fix Issue 7649 - Bad lambda inference in default function argument
f293a10 fix Issue 7499 - [ICE] ('cast.c line 1495) with lambda array
9f0622c Expression::inferType() and remove FuncExp::setType()
cfc67b7 refactor lambda inference process
6d49586 more de-TX86'ing in relation to a bunch of OP codes
2efbf6a TX86-ectomy in evalu8.c
953f6d7 rip TX86 conditionals out of el.c
d5663c7 fix Issue 7595 - Data being overwritten.
449c165 Add predefined Ddoc macro SRCFILENAME
5c5da66 fix uninitialized field
29cde54 Merge pull request #783 from 9rnsr/fix7038
06d65ab fix Issue 7038 - Type mismatch with const struct
b77e2c9 fix Issue 7110 - opSlice() & opIndex functions works unstable as template arguments
a65f02f Merge pull request #781 from braddr/fix
08d6cd5 Merge pull request #782 from braddr/fixiasm
2492332 fix latent bug with Lexer::peek and recently introduced bug in Lexer::scan
ec1888e initialize popndTmp rather than rely on carefulness when usNumops == 0 and emitting a vector instruction, popndTmp is left uninitialized and is later dereferenced.
1d4a742 Merge pull request #766 from 9rnsr/fix7563
e1cd535 refactor
90f8dcf fix Issue 7641 - std.typecons.Proxy incorrectly allows implicit conversion to class
83a93cf Merge pull request #778 from dawgfoto/MoreSpellCorrection
7f0bcde 2nd go at fix issue 5590
567d7df fix Issue 5590 - Regression(2.036) ICE(e2ir.c): when using .values on enum which is associative array
48ea951 fix Issue 4820 - Regression(1.058, 2.044) in DStress caused by changeset 452
e8f9f3b more spell correction
afd9a45 fix Issue 7618 - delegate/function pointer call bypass parameter storage class
dabcdfb Merge pull request #773 from 9rnsr/fix7583
9846bb2 Merge pull request #774 from donc/ctfe7568
8c20445 Merge pull request #775 from donc/_error6785
d41e58e Avoiding shallow copy is more better.
cccef09 Revert "fix Issue 7585 - functions in templates inferred as delegate"
fc8dfc0 6785 Wrong error message from pragma(msg) of failed instantiation
61ec04d 7568 pragma(msg) segfaults with an aggregate including a class.
4d86d39 Merge pull request #767 from 9rnsr/fix7585
207d351 fix Issue 7583 - [CTFE] ICE with tuple and alias this
53bafa2 fix Issue 7411 - Deduce base type from vector types in templates
5ab1bd9 fix Issue 7518 - std.array.empty doesn't work for shared arrays
a1030d3 fix Issue 7554 - Immutable function pointer arguments too
5e96900 Merge pull request #771 from donc/bug7589
2287ebc fix Issue 7547 - -deps output lists object as a top level module
e611781 7589 __traits(compiles) does not work with a template that fails to compile
0113cde fix Issue 7585 - functions in templates inferred as delegate
4b978d5 fix Issue 7563 - Class members with default template arguments have no type
4d68981 fix Issue 7500 - [ICE] (template.c line 5287) with immutable lambda function
1a39c3c missed a line
6dd89ca Merge pull request #765 from 9rnsr/fix7525
8d6dcac fix Issue 7502 - 2.056 regression: Assigning .init takes forever to compile for large structs
042096e fix Issue 7525 - Broken return type inference for delegate returns
c5affa5 fix Issue 7582 - Untyped nested delegate literals don't compile
121677c fix Issue 7580 - Identity assignment of Nullable crashes dmd
adc0502 Small refactoring to resolve alias this.
1f52383 Merge pull request #671 from yebblies/issue4958
2a12345 fix build breakage
8755819 fix build
ba86204 fix vcbuild
464c664 fix linux build
31197c8 tweaked command line moved some inline asm to C-function to not interfere with optimizations build with VS2011
4dcdc9c increase stack size for win64 build
77262aa add missing include to root
56afe3f batch to build through win32.mak
5a0fd30 build through win32.mak
a5b5190 long_double -> longdouble remove C99 printf add Win64 support
9640110 vcbuild
b619171 Merge pull request #761 from donc/ctfe7473structref
7756328 Merge pull request #725 from kennytm/bug7399-import-too-fatal
bbac9e4 Merge pull request #759 from yebblies/issue1149
d1ff23b 7473 [CTFE] Non-ref argument behaves as if it's a ref argument
ab5cb18 Fix OPmsw codegen - integer only is too restrictive.
a00833b Merge pull request #743 from yebblies/issue3354
b006e11 Merge pull request #757 from 9rnsr/fix7562
3bccbb0 fix Issue 7562 - DMD crashes by using TemplateThisParameter
a7dc50e Merge pull request #749 from yebblies/issue1149
a873c5f Merge pull request #758 from 9rnsr/fix5525
5d639ec fix Issue 5525 - Eponymous templates should allow for overloaded eponymous members
f50852c Merge pull request #729 from donc/gag4269
de02523 fix Issue 3927 - array.length++; is an error, but ++array.length compiles
1dc5bfd Merge pull request #680 from yebblies/issue3812
cf887ba move errors to Dsymbol
fc4acf5 Merge pull request #755 from donc/seaOfErrors7557
be2f3a9 7557b soldier on through dottemplate expressions
8cec825 7557 Sea of errors after template failure
37ec6d6 A small fixup to call Type::defaultInitLiteral
7b5e2cb Revert "Revert "Merge pull request #41 from 9rnsr/rvalue-struct-literal""
3d8f09a Merge branch 'master' of github.com:D-Programming-Language/dmd
7dfb4cc Merge pull request #752 from braddr/cleanup-backend2
1b28f51 Merge branch 'master' of github.com:D-Programming-Language/dmd
31ad73c Merge pull request #746 from yebblies/issue5554
25f770d Change lexer to support # as a token, preserving #line's original behavior
dd8d20a Revert "Merge pull request #41 from 9rnsr/rvalue-struct-literal"
ee2fdf9 Merge pull request #41 from 9rnsr/rvalue-struct-literal
f94fdbf Merge pull request #750 from yebblies/issue3630
61f5fcf Improve codegen for OPmsw
05a3fa4 Merge pull request #744 from Safety0ff/avx-fix
0231d6a Merge pull request #748 from 9rnsr/fix7552
9a97979 Merge pull request #751 from donc/ctfe7536
e091e6e 7536 ctfeAdrOnStack triggered
c9edaf4 fix Issue 7552 - Cannot get and combine a part of overloaded functions
1edeba9 Fix Issue 3630 - bad error location in "has no effect in expression" error
7d0fb72 Fix Issue 5554 - [qtd] Covariance detection failure
4f36aca fix Issue 7550 - Missing AVX instruction VPMULDQ
0b82dfe Fix Issue 5879 - Not all frontend errors use stderr
963a41a Merge pull request #695 from yebblies/refactor_expression
3f06690 Fix Issue 3354 - asm fld x, ST(6); accepted
713f69f Merge pull request #677 from yebblies/issue4241
cf22ce3 Merge pull request #711 from yebblies/issue3559
56ca73c Merge pull request #700 from kennytm/bug7452_lazy_safe
c4dc723 Merge pull request #736 from ibuclaw/in_gcc
121c9b9 Merge pull request #737 from yebblies/issue7544
cedcb3c Merge pull request #740 from yebblies/issue7545
fb3e8f2 Merge pull request #741 from dawgfoto/DMCWarning
5d26c1e Merge pull request #735 from 9rnsr/fix7105
734a921 dmc warning
1e1cfbc Fix Issue 7545 - ICE(cast.c) Merge integral types through alias this
6b135be Fix Issue 7544 - ICE(interpret.c) Catching an exception with a null catch block
c5336f9 Update already existing gdc-specific code, harmonise headers.
44b8d59 Merge pull request #703 from kennytm/bug435_template_ctor
6b368e1 Merge pull request #707 from yebblies/issue3822
8439e07 Merge pull request #717 from yebblies/issue6611
2b4502e fix Issue 7105 - relax inout rules
ac4463a wildsubparam isn't need anymore, because it works properly.
f77879a Issue 6611 - better error message for array post increment/decrement
7393395 Merge pull request #716 from yebblies/issue6685
77568f0 Merge pull request #719 from yebblies/issue4536
9accb04 tired of tdata()
5fbd5a2 Merge pull request #732 from dawgfoto/fix5412
41a901a Revert "hide private/package module level symbols"
23d5e14 Merge pull request #733 from dawgfoto/HideModuleMembers
e2f8a23 hide private/package module level symbols
ae75287 detect collisions with renamed imports
75a2442 fix Dsymbol::search_correct
50e122a Merge pull request #723 from kennytm/bug7504_null_array
c5b7601 Revert "fix 7494 - selective imports in function scope"
aa6f4d9 Revert "fix Protection"
5be660e Revert "fix Imports"
040371b Revert "detect collisions with renamed imports"
0159818 Revert "find private symbols during spell correction"
0c95c45 find private symbols during spell correction
ca22fb2 detect collisions with renamed imports
0dca0af fix Imports
37d4fda fix Protection
16a2e7e fix 7494 - selective imports in function scope
c16f5b2 Merge pull request #667 from 9rnsr/fix7406
f776617 explanatory comments belong in the code, not bugzilla
bfa2060 Merge pull request #704 from donc/_error6699
f46705c fix fail222 regression
28d9635 Merge pull request #708 from donc/soldieron7481
2c2a7af Merge pull request #715 from 9rnsr/fix6738
98cfa64 Merge pull request #722 from 9rnsr/fix7353
b040567 revert pull 724
0e84f63 revert part of pull 724
400f702 Merge pull request #724 from yebblies/issue3632
d82cc74 Merge pull request #720 from yebblies/issue3279
2da3bed Merge pull request #718 from yebblies/fixdebugmsg
f6627ec 7527 [CTFE] Segfault when slicing a pointer at compile time
c8f09bf 4269a Regression(2.031): invalid type accepted if evaluated while errors are gagged
d10fba0 implement const/purity/nothrow/@safe inheritance
ad689fb Fix bug 7399: Broken import statement in trySemantic() causes silent compiler error
eb0c643 Add global.speculativeGag
c18220a Refactor isSpeculativeFunction into Dsymbol
f5c56d8 Issue 3632 - modify float is float to do a bitwise compare
af1cab4 Issue 7353 - NRVO not properly working with inferred return type
03ee438 Fix bug 7504: Cannot assign an object of type 'typeof(null)' to an array
dfb941c Remove debug printing in code that generates errors.
62118e3 Issue 4536 - Typetuples (T...) should have an .init member
989da7b Issue 3279 - Confusing error message when comparing types
36e8045 Issue 6685 - Allow using "with" with rvalues
60cbc6f fix issue 6738 revisited
4e20e7d Issue 3822 - Invalid optimization of alloca called with constant size
b37bf8c Fixes bug 435: Constructors should be templatized
ad8157d Issue 3559 - DMD 1.048+ fails to take function pointer from overloaded member functions
838cd06 7481 Compiler should 'soldier on' after template errors
673063e Simplify fix for 6699
1a0b199 6699a __error when instantiating function template
b6d072d 6699b __error in alias expression
df16ffa 6699c __error in synchronized error message
338f804 7462 Error message with _error_ in overridden function
0f60bd3 7463 Duplicated error message with bad template value parameter
f43e93a 6699E: _error inside error msg for bad base class
5109a5a Fixes bug 7452.
04d888f Refactor XxxAssignExp semantic
73973d6 Issue 3812 - Missing line number for implicit cast of variadic function to array
f0bbf18 Issue 3927 - array.length++; is an error, but ++array.length compiles
24576c2 Issue 783 - Cannot use an array w/ const or variable index as new[] size argument.
7e4cd4b Issue 4241 - duplicate union initialization error doesn't give a file location
9987127 Issue 4958 - Floating point enums should check for total loss of precision
60287fd Issue 7406 - tuple foreach doesn't work with mixed tuples
633d88e Issue 5889 - Struct literal/construction should be rvalue
5d5f78a Now function overloading with ref and non-ref parameter is legal for struct type
git-subtree-dir: dmd2
git-subtree-split: 3443f38fc4c17807a0f36005a05d598cfc7301db
6459 lines
207 KiB
C
6459 lines
207 KiB
C
// Copyright (C) 1984-1998 by Symantec
|
||
// Copyright (C) 2000-2011 by Digital Mars
|
||
// All Rights Reserved
|
||
// http://www.digitalmars.com
|
||
// Written by Walter Bright
|
||
/*
|
||
* This source file is made available for personal use
|
||
* only. The license is in /dmd/src/dmd/backendlicense.txt
|
||
* or /dm/src/dmd/backendlicense.txt
|
||
* For any other uses, please contact Digital Mars.
|
||
*/
|
||
|
||
#if !SPP
|
||
|
||
#include <stdio.h>
|
||
#include <string.h>
|
||
#include <stdlib.h>
|
||
#include <time.h>
|
||
#include "cc.h"
|
||
#include "el.h"
|
||
#include "code.h"
|
||
#include "oper.h"
|
||
#include "global.h"
|
||
#include "type.h"
|
||
#include "tinfo.h"
|
||
#if SCPP
|
||
#include "exh.h"
|
||
#endif
|
||
|
||
#if HYDRATE
|
||
#include "parser.h"
|
||
#endif
|
||
|
||
static char __file__[] = __FILE__; /* for tassert.h */
|
||
#include "tassert.h"
|
||
|
||
extern targ_size_t retsize;
|
||
STATIC void pinholeopt_unittest();
|
||
STATIC void do8bit (enum FL,union evc *);
|
||
STATIC void do16bit (enum FL,union evc *,int);
|
||
STATIC void do32bit (enum FL,union evc *,int,targ_size_t = 0);
|
||
STATIC void do64bit (enum FL,union evc *,int);
|
||
|
||
static int hasframe; /* !=0 if this function has a stack frame */
|
||
static targ_size_t Foff; // BP offset of floating register
|
||
static targ_size_t CSoff; // offset of common sub expressions
|
||
static targ_size_t NDPoff; // offset of saved 8087 registers
|
||
int BPoff; // offset from BP
|
||
static int EBPtoESP; // add to EBP offset to get ESP offset
|
||
static int AAoff; // offset of alloca temporary
|
||
|
||
#if ELFOBJ || MACHOBJ
|
||
#define JMPSEG CDATA
|
||
#define JMPOFF CDoffset
|
||
#else
|
||
#define JMPSEG DATA
|
||
#define JMPOFF Doffset
|
||
#endif
|
||
|
||
/*************
|
||
* Size in bytes of each instruction.
|
||
* 0 means illegal instruction.
|
||
* bit M: if there is a modregrm field (EV1 is reserved for modregrm)
|
||
* bit T: if there is a second operand (EV2)
|
||
* bit E: if second operand is only 8 bits
|
||
* bit A: a short version exists for the AX reg
|
||
* bit R: a short version exists for regs
|
||
* bits 2..0: size of instruction (excluding optional bytes)
|
||
*/
|
||
|
||
#define M 0x80
|
||
#define T 0x40
|
||
#define E 0x20
|
||
#define A 0x10
|
||
#define R 0x08
|
||
#define W 0
|
||
|
||
static unsigned char inssize[256] =
|
||
{ M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 00 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 08 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 10 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 18 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 20 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 28 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 30 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 38 */
|
||
1,1,1,1, 1,1,1,1, /* 40 */
|
||
1,1,1,1, 1,1,1,1, /* 48 */
|
||
1,1,1,1, 1,1,1,1, /* 50 */
|
||
1,1,1,1, 1,1,1,1, /* 58 */
|
||
1,1,M|2,M|2, 1,1,1,1, /* 60 */
|
||
T|3,M|T|4,T|E|2,M|T|E|3, 1,1,1,1, /* 68 */
|
||
T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 70 */
|
||
T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 78 */
|
||
M|T|E|A|3,M|T|A|4,M|T|E|3,M|T|E|3, M|2,M|2,M|2,M|A|R|2, /* 80 */
|
||
M|A|2,M|A|2,M|A|2,M|A|2, M|2,M|2,M|2,M|R|2, /* 88 */
|
||
1,1,1,1, 1,1,1,1, /* 90 */
|
||
1,1,T|5,1, 1,1,1,1, /* 98 */
|
||
#if 0 /* cod3_set32() patches this */
|
||
T|5,T|5,T|5,T|5, 1,1,1,1, /* A0 */
|
||
#else
|
||
T|3,T|3,T|3,T|3, 1,1,1,1, /* A0 */
|
||
#endif
|
||
T|E|2,T|3,1,1, 1,1,1,1, /* A8 */
|
||
T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* B0 */
|
||
T|3,T|3,T|3,T|3, T|3,T|3,T|3,T|3, /* B8 */
|
||
M|T|E|3,M|T|E|3,T|3,1, M|2,M|2,M|T|E|R|3,M|T|R|4, /* C0 */
|
||
T|E|4,1,T|3,1, 1,T|E|2,1,1, /* C8 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|E|2,0,1, /* D0 */
|
||
/* For the floating instructions, allow room for the FWAIT */
|
||
M|2,M|2,M|2,M|2, M|2,M|2,M|2,M|2, /* D8 */
|
||
T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* E0 */
|
||
T|3,T|3,T|5,T|E|2, 1,1,1,1, /* E8 */
|
||
1,0,1,1, 1,1,M|A|2,M|A|2, /* F0 */
|
||
1,1,1,1, 1,1,M|2,M|R|2 /* F8 */
|
||
};
|
||
|
||
static const unsigned char inssize32[256] =
|
||
{ 2,2,2,2, 2,5,1,1, /* 00 */
|
||
2,2,2,2, 2,5,1,1, /* 08 */
|
||
2,2,2,2, 2,5,1,1, /* 10 */
|
||
2,2,2,2, 2,5,1,1, /* 18 */
|
||
2,2,2,2, 2,5,1,1, /* 20 */
|
||
2,2,2,2, 2,5,1,1, /* 28 */
|
||
2,2,2,2, 2,5,1,1, /* 30 */
|
||
2,2,2,2, 2,5,1,1, /* 38 */
|
||
1,1,1,1, 1,1,1,1, /* 40 */
|
||
1,1,1,1, 1,1,1,1, /* 48 */
|
||
1,1,1,1, 1,1,1,1, /* 50 */
|
||
1,1,1,1, 1,1,1,1, /* 58 */
|
||
1,1,2,2, 1,1,1,1, /* 60 */
|
||
5,6,2,3, 1,1,1,1, /* 68 */
|
||
2,2,2,2, 2,2,2,2, /* 70 */
|
||
2,2,2,2, 2,2,2,2, /* 78 */
|
||
3,6,3,3, 2,2,2,2, /* 80 */
|
||
2,2,2,2, 2,2,2,2, /* 88 */
|
||
1,1,1,1, 1,1,1,1, /* 90 */
|
||
1,1,7,1, 1,1,1,1, /* 98 */
|
||
5,5,5,5, 1,1,1,1, /* A0 */
|
||
2,5,1,1, 1,1,1,1, /* A8 */
|
||
2,2,2,2, 2,2,2,2, /* B0 */
|
||
5,5,5,5, 5,5,5,5, /* B8 */
|
||
3,3,3,1, 2,2,3,6, /* C0 */
|
||
4,1,3,1, 1,2,1,1, /* C8 */
|
||
2,2,2,2, 2,2,0,1, /* D0 */
|
||
/* For the floating instructions, don't need room for the FWAIT */
|
||
2,2,2,2, 2,2,2,2, /* D8 */
|
||
|
||
2,2,2,2, 2,2,2,2, /* E0 */
|
||
5,5,7,2, 1,1,1,1, /* E8 */
|
||
1,0,1,1, 1,1,2,2, /* F0 */
|
||
1,1,1,1, 1,1,2,2 /* F8 */
|
||
};
|
||
|
||
/* For 2 byte opcodes starting with 0x0F */
|
||
static unsigned char inssize2[256] =
|
||
{ M|3,M|3,M|3,M|3, 2,2,2,2, // 00
|
||
2,2,M|3,2, 2,2,2,M|T|E|4, // 08
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 10
|
||
M|3,2,2,2, 2,2,2,2, // 18
|
||
M|3,M|3,M|3,M|3, M|3,2,M|3,2, // 20
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 28
|
||
2,2,2,2, 2,2,2,2, // 30
|
||
M|4,2,M|T|E|5,2, 2,2,2,2, // 38
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 40
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 48
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 50
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 58
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 60
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 68
|
||
M|T|E|4,M|T|E|4,M|T|E|4,M|T|E|4, M|3,M|3,M|3,2, // 70
|
||
2,2,2,2, M|3,M|3,M|3,M|3, // 78
|
||
W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 80
|
||
W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 88
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 90
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 98
|
||
2,2,2,M|3, M|T|E|4,M|3,2,2, // A0
|
||
2,2,2,M|3, M|T|E|4,M|3,M|3,M|3, // A8
|
||
M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // B0
|
||
M|3,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3, // B8
|
||
M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3, // C0
|
||
2,2,2,2, 2,2,2,2, // C8
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D0
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D8
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E0
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E8
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // F0
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,2 // F8
|
||
};
|
||
|
||
/*************************************************
|
||
* Allocate register temporaries
|
||
*/
|
||
|
||
code *REGSAVE::save(code *c, int reg, unsigned *pidx)
|
||
{
|
||
unsigned i;
|
||
if (reg >= XMM0)
|
||
{
|
||
alignment = 16;
|
||
idx = (idx + 15) & ~15;
|
||
i = idx;
|
||
idx += 16;
|
||
// MOVD idx[RBP],xmm
|
||
c = genc1(c,0xF20F11,modregxrm(2, reg - XMM0, BPRM),FLregsave,(targ_uns) i);
|
||
}
|
||
else
|
||
{
|
||
if (!alignment)
|
||
alignment = REGSIZE;
|
||
i = idx;
|
||
idx += REGSIZE;
|
||
// MOV idx[RBP],reg
|
||
c = genc1(c,0x89,modregxrm(2, reg, BPRM),FLregsave,(targ_uns) i);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
reflocal = TRUE;
|
||
if (idx > top)
|
||
top = idx; // keep high water mark
|
||
*pidx = i;
|
||
return c;
|
||
}
|
||
|
||
code *REGSAVE::restore(code *c, int reg, unsigned idx)
|
||
{
|
||
if (reg >= XMM0)
|
||
{
|
||
assert(alignment == 16);
|
||
// MOVD xmm,idx[RBP]
|
||
c = genc1(c,0xF20F10,modregxrm(2, reg - XMM0, BPRM),FLregsave,(targ_uns) idx);
|
||
}
|
||
else
|
||
{ // MOV reg,idx[RBP]
|
||
c = genc1(c,0x8B,modregxrm(2, reg, BPRM),FLregsave,(targ_uns) idx);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
return c;
|
||
}
|
||
|
||
/************************************
|
||
* Size for vex encoded instruction.
|
||
*/
|
||
|
||
unsigned char vex_inssize(code *c)
|
||
{
|
||
assert(c->Iflags & CFvex);
|
||
unsigned char ins;
|
||
if (c->Iflags & CFvex3)
|
||
{
|
||
switch (c->Ivex.mmmm)
|
||
{
|
||
case 0: // no prefix
|
||
case 1: // 0F
|
||
ins = inssize2[c->Ivex.op] + 2;
|
||
break;
|
||
case 2: // 0F 38
|
||
ins = inssize2[0x38] + 1;
|
||
break;
|
||
case 3: // 0F 3A
|
||
ins = inssize2[0x3A] + 1;
|
||
break;
|
||
default:
|
||
assert(0);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
ins = inssize2[c->Ivex.op] + 1;
|
||
}
|
||
return ins;
|
||
}
|
||
|
||
/************************************
|
||
* Determine if there is a modregrm byte for code.
|
||
*/
|
||
|
||
int cod3_EA(code *c)
|
||
{ unsigned ins;
|
||
|
||
unsigned op1 = c->Iop & 0xFF;
|
||
if (op1 == ESCAPE)
|
||
ins = 0;
|
||
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(c->Iop >> 8) & 0xFF];
|
||
else if ((c->Iop & 0xFF00) == 0x0F00)
|
||
ins = inssize2[op1];
|
||
else
|
||
ins = inssize[op1];
|
||
return ins & M;
|
||
}
|
||
|
||
/********************************
|
||
* Fix global variables for 386.
|
||
*/
|
||
|
||
void cod3_set32()
|
||
{
|
||
inssize[0xA0] = T|5;
|
||
inssize[0xA1] = T|5;
|
||
inssize[0xA2] = T|5;
|
||
inssize[0xA3] = T|5;
|
||
BPRM = 5; /* [EBP] addressing mode */
|
||
fregsaved = mBP | mBX | mSI | mDI; // saved across function calls
|
||
FLOATREGS = FLOATREGS_32;
|
||
FLOATREGS2 = FLOATREGS2_32;
|
||
DOUBLEREGS = DOUBLEREGS_32;
|
||
if (config.flags3 & CFG3eseqds)
|
||
fregsaved |= mES;
|
||
|
||
for (unsigned i = 0x80; i < 0x90; i++)
|
||
inssize2[i] = W|T|6;
|
||
}
|
||
|
||
/********************************
|
||
* Fix global variables for I64.
|
||
*/
|
||
|
||
void cod3_set64()
|
||
{
|
||
inssize[0xA0] = T|5; // MOV AL,mem
|
||
inssize[0xA1] = T|5; // MOV RAX,mem
|
||
inssize[0xA2] = T|5; // MOV mem,AL
|
||
inssize[0xA3] = T|5; // MOV mem,RAX
|
||
BPRM = 5; // [RBP] addressing mode
|
||
|
||
fregsaved = mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES; // saved across function calls
|
||
FLOATREGS = FLOATREGS_64;
|
||
FLOATREGS2 = FLOATREGS2_64;
|
||
DOUBLEREGS = DOUBLEREGS_64;
|
||
STACKALIGN = 16;
|
||
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15;
|
||
BYTEREGS = ALLREGS;
|
||
#endif
|
||
|
||
for (unsigned i = 0x80; i < 0x90; i++)
|
||
inssize2[i] = W|T|6;
|
||
}
|
||
|
||
/*********************************
|
||
* Word or dword align start of function.
|
||
*/
|
||
|
||
void cod3_align()
|
||
{
|
||
static unsigned char nops[7] = { 0x90,0x90,0x90,0x90,0x90,0x90,0x90 };
|
||
unsigned nbytes;
|
||
#if OMFOBJ
|
||
if (config.flags4 & CFG4speed) // if optimized for speed
|
||
{
|
||
// Pick alignment based on CPU target
|
||
if (config.target_cpu == TARGET_80486 ||
|
||
config.target_cpu >= TARGET_PentiumPro)
|
||
{ // 486 does reads on 16 byte boundaries, so if we are near
|
||
// such a boundary, align us to it
|
||
|
||
nbytes = -Coffset & 15;
|
||
if (nbytes < 8)
|
||
{
|
||
Coffset += obj_bytes(cseg,Coffset,nbytes,nops); // XCHG AX,AX
|
||
}
|
||
}
|
||
}
|
||
#else
|
||
nbytes = -Coffset & 3;
|
||
//dbg_printf("cod3_align Coffset %x nbytes %d\n",Coffset,nbytes);
|
||
obj_bytes(cseg,Coffset,nbytes,nops);
|
||
#endif
|
||
}
|
||
|
||
/*****************************
|
||
* Given a type, return a mask of
|
||
* registers to hold that type.
|
||
* Input:
|
||
* tyf function type
|
||
*/
|
||
|
||
regm_t regmask(tym_t tym, tym_t tyf)
|
||
{
|
||
switch (tybasic(tym))
|
||
{
|
||
case TYvoid:
|
||
case TYstruct:
|
||
return 0;
|
||
case TYbool:
|
||
case TYwchar_t:
|
||
case TYchar16:
|
||
case TYchar:
|
||
case TYschar:
|
||
case TYuchar:
|
||
case TYshort:
|
||
case TYushort:
|
||
case TYint:
|
||
case TYuint:
|
||
#if JHANDLE
|
||
case TYjhandle:
|
||
#endif
|
||
case TYnullptr:
|
||
case TYnptr:
|
||
#if TARGET_SEGMENTED
|
||
case TYsptr:
|
||
case TYcptr:
|
||
#endif
|
||
return mAX;
|
||
|
||
case TYfloat:
|
||
case TYifloat:
|
||
if (I64)
|
||
return mXMM0;
|
||
if (config.exe & EX_flat)
|
||
return mST0;
|
||
case TYlong:
|
||
case TYulong:
|
||
case TYdchar:
|
||
if (!I16)
|
||
return mAX;
|
||
#if TARGET_SEGMENTED
|
||
case TYfptr:
|
||
case TYhptr:
|
||
#endif
|
||
return mDX | mAX;
|
||
|
||
case TYcent:
|
||
case TYucent:
|
||
assert(I64);
|
||
return mDX | mAX;
|
||
|
||
#if TARGET_SEGMENTED
|
||
case TYvptr:
|
||
return mDX | mBX;
|
||
#endif
|
||
|
||
case TYdouble:
|
||
case TYdouble_alias:
|
||
case TYidouble:
|
||
if (I64)
|
||
return mXMM0;
|
||
if (config.exe & EX_flat)
|
||
return mST0;
|
||
return DOUBLEREGS;
|
||
|
||
case TYllong:
|
||
case TYullong:
|
||
return I64 ? mAX : (I32 ? mDX | mAX : DOUBLEREGS);
|
||
|
||
case TYldouble:
|
||
case TYildouble:
|
||
return mST0;
|
||
|
||
case TYcfloat:
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
if (I32 && tybasic(tyf) == TYnfunc)
|
||
return mDX | mAX;
|
||
#endif
|
||
case TYcdouble:
|
||
if (I64)
|
||
return mXMM0 | mXMM1;
|
||
case TYcldouble:
|
||
return mST01;
|
||
|
||
// SIMD vector types
|
||
case TYfloat4:
|
||
case TYdouble2:
|
||
case TYschar16:
|
||
case TYuchar16:
|
||
case TYshort8:
|
||
case TYushort8:
|
||
case TYlong4:
|
||
case TYulong4:
|
||
case TYllong2:
|
||
case TYullong2:
|
||
if (!config.fpxmmregs)
|
||
{ printf("SIMD operations not supported on this platform\n");
|
||
exit(1);
|
||
}
|
||
return mXMM0;
|
||
|
||
default:
|
||
#if DEBUG
|
||
WRTYxx(tym);
|
||
#endif
|
||
assert(0);
|
||
return 0;
|
||
}
|
||
}
|
||
|
||
/*******************************
|
||
* Generate block exit code
|
||
*/
|
||
void outblkexitcode(block *bl, code*& c, int& anyspill, const char* sflsave, symbol** retsym, const regm_t mfuncregsave)
|
||
{
|
||
elem *e = bl->Belem;
|
||
block *nextb;
|
||
block *bs1,*bs2;
|
||
regm_t retregs = 0;
|
||
bool jcond;
|
||
|
||
switch (bl->BC) /* block exit condition */
|
||
{
|
||
case BCiftrue:
|
||
jcond = TRUE;
|
||
bs1 = list_block(bl->Bsucc);
|
||
bs2 = list_block(list_next(bl->Bsucc));
|
||
if (bs1 == bl->Bnext)
|
||
{ // Swap bs1 and bs2
|
||
block *btmp;
|
||
|
||
jcond ^= 1;
|
||
btmp = bs1;
|
||
bs1 = bs2;
|
||
bs2 = btmp;
|
||
}
|
||
c = cat(c,logexp(e,jcond,FLblock,(code *) bs1));
|
||
nextb = bs2;
|
||
bl->Bcode = NULL;
|
||
L2:
|
||
if (nextb != bl->Bnext)
|
||
{ if (configv.addlinenumbers && bl->Bsrcpos.Slinnum &&
|
||
!(funcsym_p->ty() & mTYnaked))
|
||
cgen_linnum(&c,bl->Bsrcpos);
|
||
assert(!(bl->Bflags & BFLepilog));
|
||
c = cat(c,genjmp(CNIL,JMP,FLblock,nextb));
|
||
}
|
||
bl->Bcode = cat(bl->Bcode,c);
|
||
break;
|
||
case BCjmptab:
|
||
case BCifthen:
|
||
case BCswitch:
|
||
assert(!(bl->Bflags & BFLepilog));
|
||
doswitch(bl); /* hide messy details */
|
||
bl->Bcode = cat(c,bl->Bcode);
|
||
break;
|
||
#if MARS
|
||
case BCjcatch:
|
||
// Mark all registers as destroyed. This will prevent
|
||
// register assignments to variables used in catch blocks.
|
||
c = cat(c,getregs((I32 | I64) ? allregs : (ALLREGS | mES)));
|
||
#if 0 && TARGET_LINUX
|
||
if (config.flags3 & CFG3pic && !(allregs & mBX))
|
||
{
|
||
c = cat(c, cod3_load_got());
|
||
}
|
||
#endif
|
||
goto case_goto;
|
||
#endif
|
||
#if SCPP
|
||
case BCcatch:
|
||
// Mark all registers as destroyed. This will prevent
|
||
// register assignments to variables used in catch blocks.
|
||
c = cat(c,getregs(allregs | mES));
|
||
#if 0 && TARGET_LINUX
|
||
if (config.flags3 & CFG3pic && !(allregs & mBX))
|
||
{
|
||
c = cat(c, cod3_load_got());
|
||
}
|
||
#endif
|
||
goto case_goto;
|
||
|
||
case BCtry:
|
||
usednteh |= EHtry;
|
||
if (config.flags2 & CFG2seh)
|
||
usednteh |= NTEHtry;
|
||
goto case_goto;
|
||
#endif
|
||
case BCgoto:
|
||
nextb = list_block(bl->Bsucc);
|
||
if ((funcsym_p->Sfunc->Fflags3 & Fnteh ||
|
||
(MARS /*&& config.flags2 & CFG2seh*/)) &&
|
||
bl->Btry != nextb->Btry &&
|
||
nextb->BC != BC_finally)
|
||
{ int toindex;
|
||
int fromindex;
|
||
|
||
bl->Bcode = NULL;
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
toindex = nextb->Btry ? nextb->Btry->Bscope_index : -1;
|
||
assert(bl->Btry);
|
||
fromindex = bl->Btry->Bscope_index;
|
||
#if MARS
|
||
if (toindex + 1 == fromindex)
|
||
{ // Simply call __finally
|
||
if (bl->Btry &&
|
||
list_block(list_next(bl->Btry->Bsucc))->BC == BCjcatch)
|
||
{
|
||
goto L2;
|
||
}
|
||
}
|
||
#endif
|
||
if (config.flags2 & CFG2seh)
|
||
c = cat(c,nteh_unwind(0,toindex));
|
||
#if MARS && (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS)
|
||
else if (toindex + 1 <= fromindex)
|
||
{
|
||
//c = cat(c, linux_unwind(0, toindex));
|
||
block *bt;
|
||
|
||
//printf("B%d: fromindex = %d, toindex = %d\n", bl->Bdfoidx, fromindex, toindex);
|
||
bt = bl;
|
||
while ((bt = bt->Btry) != NULL && bt->Bscope_index != toindex)
|
||
{ block *bf;
|
||
|
||
//printf("\tbt->Bscope_index = %d, bt->Blast_index = %d\n", bt->Bscope_index, bt->Blast_index);
|
||
bf = list_block(list_next(bt->Bsucc));
|
||
// Only look at try-finally blocks
|
||
if (bf->BC == BCjcatch)
|
||
continue;
|
||
|
||
if (bf == nextb)
|
||
continue;
|
||
//printf("\tbf = B%d, nextb = B%d\n", bf->Bdfoidx, nextb->Bdfoidx);
|
||
if (nextb->BC == BCgoto &&
|
||
!nextb->Belem &&
|
||
bf == list_block(nextb->Bsucc))
|
||
continue;
|
||
|
||
// call __finally
|
||
code *cs;
|
||
code *cr;
|
||
int nalign = 0;
|
||
|
||
gensaverestore(retregs,&cs,&cr);
|
||
if (STACKALIGN == 16)
|
||
{ int npush = (numbitsset(retregs) + 1) * REGSIZE;
|
||
if (npush & (STACKALIGN - 1))
|
||
{ nalign = STACKALIGN - (npush & (STACKALIGN - 1));
|
||
cs = genc2(cs,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign
|
||
if (I64)
|
||
code_orrex(cs, REX_W);
|
||
}
|
||
}
|
||
cs = genc(cs,0xE8,0,0,0,FLblock,(targ_size_t)list_block(bf->Bsucc));
|
||
if (nalign)
|
||
{ cs = genc2(cs,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign
|
||
if (I64)
|
||
code_orrex(cs, REX_W);
|
||
}
|
||
c = cat3(c,cs,cr);
|
||
}
|
||
}
|
||
#endif
|
||
goto L2;
|
||
}
|
||
case_goto:
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
if (anyspill)
|
||
{ // Add in the epilog code
|
||
code *cstore = NULL;
|
||
code *cload = NULL;
|
||
|
||
for (int i = 0; i < anyspill; i++)
|
||
{ symbol *s = globsym.tab[i];
|
||
|
||
if (s->Sflags & SFLspill &&
|
||
vec_testbit(dfoidx,s->Srange))
|
||
{
|
||
s->Sfl = sflsave[i]; // undo block register assignments
|
||
cgreg_spillreg_epilog(bl,s,&cstore,&cload);
|
||
}
|
||
}
|
||
c = cat3(c,cstore,cload);
|
||
}
|
||
|
||
L3:
|
||
bl->Bcode = NULL;
|
||
nextb = list_block(bl->Bsucc);
|
||
goto L2;
|
||
|
||
case BC_try:
|
||
if (config.flags2 & CFG2seh)
|
||
{ usednteh |= NTEH_try;
|
||
nteh_usevars();
|
||
}
|
||
else
|
||
usednteh |= EHtry;
|
||
goto case_goto;
|
||
|
||
case BC_finally:
|
||
// Mark all registers as destroyed. This will prevent
|
||
// register assignments to variables used in finally blocks.
|
||
assert(!getregs(allregs));
|
||
assert(!e);
|
||
assert(!bl->Bcode);
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
if (config.flags3 & CFG3pic)
|
||
{
|
||
int nalign = 0;
|
||
if (STACKALIGN == 16)
|
||
{ nalign = STACKALIGN - REGSIZE;
|
||
c = genc2(c,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
// CALL bl->Bsucc
|
||
c = genc(c,0xE8,0,0,0,FLblock,(targ_size_t)list_block(bl->Bsucc));
|
||
if (nalign)
|
||
{ c = genc2(c,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
// JMP list_next(bl->Bsucc)
|
||
nextb = list_block(list_next(bl->Bsucc));
|
||
goto L2;
|
||
}
|
||
else
|
||
#endif
|
||
{
|
||
// Generate a PUSH of the address of the successor to the
|
||
// corresponding BC_ret
|
||
//assert(list_block(list_next(bl->Bsucc))->BC == BC_ret);
|
||
// PUSH &succ
|
||
c = genc(c,0x68,0,0,0,FLblock,(targ_size_t)list_block(list_next(bl->Bsucc)));
|
||
nextb = list_block(bl->Bsucc);
|
||
goto L2;
|
||
}
|
||
|
||
case BC_ret:
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
bl->Bcode = gen1(c,0xC3); // RET
|
||
break;
|
||
|
||
#if NTEXCEPTIONS
|
||
case BC_except:
|
||
assert(!e);
|
||
usednteh |= NTEH_except;
|
||
c = cat(c,nteh_setsp(0x8B));
|
||
getregs(allregs);
|
||
goto L3;
|
||
|
||
case BC_filter:
|
||
c = cat(c,nteh_filter(bl));
|
||
// Mark all registers as destroyed. This will prevent
|
||
// register assignments to variables used in filter blocks.
|
||
getregs(allregs);
|
||
retregs = regmask(e->Ety, TYnfunc);
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
bl->Bcode = gen1(c,0xC3); // RET
|
||
break;
|
||
#endif
|
||
|
||
case BCretexp:
|
||
retregs = regmask(e->Ety, funcsym_p->ty());
|
||
|
||
// For the final load into the return regs, don't set regcon.used,
|
||
// so that the optimizer can potentially use retregs for register
|
||
// variable assignments.
|
||
|
||
if (config.flags4 & CFG4optimized)
|
||
{ regm_t usedsave;
|
||
|
||
c = cat(c,docommas(&e));
|
||
usedsave = regcon.used;
|
||
if (EOP(e))
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
else
|
||
{
|
||
if (e->Eoper == OPconst)
|
||
regcon.mvar = 0;
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
regcon.used = usedsave;
|
||
if (e->Eoper == OPvar)
|
||
{ symbol *s = e->EV.sp.Vsym;
|
||
|
||
if (s->Sfl == FLreg && s->Sregm != mAX)
|
||
*retsym = s;
|
||
}
|
||
}
|
||
}
|
||
else
|
||
{
|
||
case BCret:
|
||
case BCexit:
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
}
|
||
bl->Bcode = c;
|
||
if (retregs == mST0)
|
||
{ assert(stackused == 1);
|
||
pop87(); // account for return value
|
||
}
|
||
else if (retregs == mST01)
|
||
{ assert(stackused == 2);
|
||
pop87();
|
||
pop87(); // account for return value
|
||
}
|
||
if (bl->BC == BCexit && config.flags4 & CFG4optimized)
|
||
mfuncreg = mfuncregsave;
|
||
if (MARS || usednteh & NTEH_try)
|
||
{ block *bt;
|
||
|
||
bt = bl;
|
||
while ((bt = bt->Btry) != NULL)
|
||
{ block *bf;
|
||
|
||
bf = list_block(list_next(bt->Bsucc));
|
||
#if MARS
|
||
// Only look at try-finally blocks
|
||
if (bf->BC == BCjcatch)
|
||
{
|
||
continue;
|
||
}
|
||
#endif
|
||
if (config.flags2 & CFG2seh)
|
||
{
|
||
if (bt->Bscope_index == 0)
|
||
{
|
||
// call __finally
|
||
code *cs;
|
||
code *cr;
|
||
|
||
c = cat(c,nteh_gensindex(-1));
|
||
gensaverestore(retregs,&cs,&cr);
|
||
cs = genc(cs,0xE8,0,0,0,FLblock,(targ_size_t)list_block(bf->Bsucc));
|
||
bl->Bcode = cat3(c,cs,cr);
|
||
}
|
||
else
|
||
bl->Bcode = cat(c,nteh_unwind(retregs,~0));
|
||
break;
|
||
}
|
||
else
|
||
{
|
||
// call __finally
|
||
code *cs;
|
||
code *cr;
|
||
int nalign = 0;
|
||
|
||
gensaverestore(retregs,&cs,&cr);
|
||
if (STACKALIGN == 16)
|
||
{ int npush = (numbitsset(retregs) + 1) * REGSIZE;
|
||
if (npush & (STACKALIGN - 1))
|
||
{ nalign = STACKALIGN - (npush & (STACKALIGN - 1));
|
||
cs = genc2(cs,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign
|
||
if (I64)
|
||
code_orrex(cs, REX_W);
|
||
}
|
||
}
|
||
// CALL bf->Bsucc
|
||
cs = genc(cs,0xE8,0,0,0,FLblock,(targ_size_t)list_block(bf->Bsucc));
|
||
if (nalign)
|
||
{ cs = genc2(cs,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign
|
||
if (I64)
|
||
code_orrex(cs, REX_W);
|
||
}
|
||
bl->Bcode = c = cat3(c,cs,cr);
|
||
}
|
||
}
|
||
}
|
||
break;
|
||
|
||
#if SCPP || MARS
|
||
case BCasm:
|
||
assert(!e);
|
||
// Mark destroyed registers
|
||
assert(!c);
|
||
c = cat(c,getregs(iasm_regs(bl)));
|
||
if (bl->Bsucc)
|
||
{ nextb = list_block(bl->Bsucc);
|
||
if (!bl->Bnext)
|
||
goto L2;
|
||
if (nextb != bl->Bnext &&
|
||
bl->Bnext &&
|
||
!(bl->Bnext->BC == BCgoto &&
|
||
!bl->Bnext->Belem &&
|
||
nextb == list_block(bl->Bnext->Bsucc)))
|
||
{ code *cl;
|
||
|
||
// See if already have JMP at end of block
|
||
cl = code_last(bl->Bcode);
|
||
if (!cl || cl->Iop != JMP)
|
||
goto L2; // add JMP at end of block
|
||
}
|
||
}
|
||
break;
|
||
#endif
|
||
default:
|
||
#ifdef DEBUG
|
||
printf("bl->BC = %d\n",bl->BC);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
}
|
||
|
||
/*******************************
|
||
* Generate code for blocks ending in a switch statement.
|
||
* Take BCswitch and decide on
|
||
* BCifthen use if - then code
|
||
* BCjmptab index into jump table
|
||
* BCswitch search table for match
|
||
*/
|
||
|
||
void doswitch(block *b)
|
||
{ code *cc,*c,*ce;
|
||
regm_t retregs;
|
||
unsigned ncases,n,reg,reg2,rm;
|
||
targ_llong vmax,vmin,val;
|
||
targ_llong *p;
|
||
list_t bl;
|
||
elem *e;
|
||
|
||
tym_t tys;
|
||
int sz;
|
||
unsigned char dword;
|
||
unsigned char mswsame;
|
||
#if LONGLONG
|
||
targ_ulong msw;
|
||
#else
|
||
unsigned msw;
|
||
#endif
|
||
|
||
e = b->Belem;
|
||
elem_debug(e);
|
||
cc = docommas(&e);
|
||
cgstate.stackclean++;
|
||
tys = tybasic(e->Ety);
|
||
sz = tysize[tys];
|
||
dword = (sz == 2 * REGSIZE);
|
||
mswsame = 1; // assume all msw's are the same
|
||
p = b->BS.Bswitch; /* pointer to case data */
|
||
assert(p);
|
||
ncases = *p++; /* number of cases */
|
||
|
||
vmax = MINLL; // smallest possible llong
|
||
vmin = MAXLL; // largest possible llong
|
||
for (n = 0; n < ncases; n++) // find max and min case values
|
||
{ val = *p++;
|
||
if (val > vmax) vmax = val;
|
||
if (val < vmin) vmin = val;
|
||
if (REGSIZE == 2)
|
||
{
|
||
unsigned short ms = (val >> 16) & 0xFFFF;
|
||
if (n == 0)
|
||
msw = ms;
|
||
else if (msw != ms)
|
||
mswsame = 0;
|
||
}
|
||
else // REGSIZE == 4
|
||
{
|
||
targ_ulong ms = (val >> 32) & 0xFFFFFFFF;
|
||
if (n == 0)
|
||
msw = ms;
|
||
else if (msw != ms)
|
||
mswsame = 0;
|
||
}
|
||
}
|
||
p -= ncases;
|
||
//dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin);
|
||
|
||
if (I64)
|
||
{ // For now, just generate basic if-then sequence to get us running
|
||
retregs = ALLREGS;
|
||
b->BC = BCifthen;
|
||
c = scodelem(e,&retregs,0,TRUE);
|
||
assert(!dword); // 128 bit switches not supported
|
||
reg = findreg(retregs); // reg that result is in
|
||
bl = b->Bsucc;
|
||
for (n = 0; n < ncases; n++)
|
||
{ code *cx;
|
||
val = *p;
|
||
if (sz == 4)
|
||
cx = genc2(CNIL,0x81,modregrmx(3,7,reg),val); // CMP reg,val
|
||
else if (sz == 8)
|
||
{
|
||
if (val == (int)val) // if val is a 64 bit value sign-extended from 32 bits
|
||
{
|
||
cx = genc2(CNIL,0x81,modregrmx(3,7,reg),val); // CMP reg,value32
|
||
cx->Irex |= REX_W; // 64 bit operand
|
||
}
|
||
else
|
||
{ unsigned sreg;
|
||
// MOV sreg,value64
|
||
cx = regwithvalue(CNIL, ALLREGS & ~mask[reg], val, &sreg, 64);
|
||
cx = genregs(cx,0x3B,reg,sreg); // CMP reg,sreg
|
||
code_orrex(cx, REX_W);
|
||
}
|
||
}
|
||
else
|
||
assert(0);
|
||
bl = list_next(bl);
|
||
genjmp(cx,JE,FLblock,list_block(bl)); // JE caseaddr
|
||
c = cat(c,cx);
|
||
p++;
|
||
}
|
||
if (list_block(b->Bsucc) != b->Bnext) /* if default is not next block */
|
||
c = cat(c,genjmp(CNIL,JMP,FLblock,list_block(b->Bsucc)));
|
||
ce = NULL;
|
||
}
|
||
// Need to do research on MACHOBJ to see about better methods
|
||
else if (MACHOBJ || ncases <= 3)
|
||
{ // generate if-then sequence
|
||
retregs = ALLREGS;
|
||
L1:
|
||
b->BC = BCifthen;
|
||
c = scodelem(e,&retregs,0,TRUE);
|
||
if (dword)
|
||
{ reg = findreglsw(retregs);
|
||
reg2 = findregmsw(retregs);
|
||
}
|
||
else
|
||
reg = findreg(retregs); /* reg that result is in */
|
||
bl = b->Bsucc;
|
||
if (dword && mswsame)
|
||
{ /* CMP reg2,MSW */
|
||
c = genc2(c,0x81,modregrm(3,7,reg2),msw);
|
||
genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
|
||
}
|
||
for (n = 0; n < ncases; n++)
|
||
{ code *cnext = CNIL;
|
||
/* CMP reg,casevalue */
|
||
c = cat(c,ce = genc2(CNIL,0x81,modregrm(3,7,reg),(targ_int)*p));
|
||
if (dword && !mswsame)
|
||
{
|
||
cnext = gennop(CNIL);
|
||
genjmp(ce,JNE,FLcode,(block *) cnext);
|
||
genc2(ce,0x81,modregrm(3,7,reg2),MSREG(*p));
|
||
}
|
||
bl = list_next(bl);
|
||
/* JE caseaddr */
|
||
genjmp(ce,JE,FLblock,list_block(bl));
|
||
c = cat(c,cnext);
|
||
p++;
|
||
}
|
||
if (list_block(b->Bsucc) != b->Bnext) /* if default is not next block */
|
||
c = cat(c,genjmp(CNIL,JMP,FLblock,list_block(b->Bsucc)));
|
||
ce = NULL;
|
||
}
|
||
#if TARGET_WINDOS // try and find relocation to support this
|
||
else if ((targ_ullong)(vmax - vmin) <= ncases * 2) // then use jump table
|
||
{ int modify;
|
||
|
||
b->BC = BCjmptab;
|
||
retregs = IDXREGS;
|
||
if (dword)
|
||
retregs |= mMSW;
|
||
modify = (vmin || !I32);
|
||
c = scodelem(e,&retregs,0,!modify);
|
||
reg = findreg(retregs & IDXREGS); /* reg that result is in */
|
||
if (dword)
|
||
reg2 = findregmsw(retregs);
|
||
if (modify)
|
||
{
|
||
assert(!(retregs & regcon.mvar));
|
||
c = cat(c,getregs(retregs));
|
||
}
|
||
if (vmin) /* if there is a minimum */
|
||
{
|
||
c = genc2(c,0x81,modregrm(3,5,reg),vmin); /* SUB reg,vmin */
|
||
if (dword)
|
||
{ genc2(c,0x81,modregrm(3,3,reg2),MSREG(vmin)); // SBB reg2,vmin
|
||
genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
|
||
}
|
||
}
|
||
else if (dword)
|
||
{ c = gentstreg(c,reg2); // TEST reg2,reg2
|
||
genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
|
||
}
|
||
if (vmax - vmin != REGMASK) /* if there is a maximum */
|
||
{ /* CMP reg,vmax-vmin */
|
||
c = genc2(c,0x81,modregrm(3,7,reg),vmax-vmin);
|
||
genjmp(c,JA,FLblock,list_block(b->Bsucc)); /* JA default */
|
||
}
|
||
if (!I32)
|
||
c = gen2(c,0xD1,modregrm(3,4,reg)); /* SHL reg,1 */
|
||
if (I32)
|
||
{
|
||
ce = genc1(CNIL,0xFF,modregrm(0,4,4),FLswitch,0); /* JMP [CS:]disp[idxreg*4] */
|
||
ce->Isib = modregrm(2,reg,5);
|
||
}
|
||
else
|
||
{ rm = getaddrmode(retregs) | modregrm(0,4,0);
|
||
ce = genc1(CNIL,0xFF,rm,FLswitch,0); /* JMP [CS:]disp[idxreg] */
|
||
}
|
||
int flags = (config.flags & CFGromable) ? CFcs : 0; // table is in code seg
|
||
ce->Iflags |= flags; // segment override
|
||
ce->IEV1.Vswitch = b;
|
||
b->Btablesize = (int) (vmax - vmin + 1) * tysize[TYnptr];
|
||
}
|
||
#endif
|
||
else /* else use switch table (BCswitch) */
|
||
{ targ_size_t disp;
|
||
int mod;
|
||
code *esw;
|
||
code *ct;
|
||
|
||
retregs = mAX; /* SCASW requires AX */
|
||
if (dword)
|
||
retregs |= mDX;
|
||
else if (ncases <= 6 || config.flags4 & CFG4speed)
|
||
goto L1;
|
||
c = scodelem(e,&retregs,0,TRUE);
|
||
if (dword && mswsame)
|
||
{ /* CMP DX,MSW */
|
||
c = genc2(c,0x81,modregrm(3,7,DX),msw);
|
||
genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
|
||
}
|
||
ce = getregs(mCX|mDI);
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
if (config.flags3 & CFG3pic)
|
||
{ // Add in GOT
|
||
code *cx;
|
||
code *cgot;
|
||
|
||
ce = cat(ce, getregs(mDX));
|
||
cx = genc2(NULL,CALL,0,0); // CALL L1
|
||
gen1(cx, 0x58 + DI); // L1: POP EDI
|
||
|
||
// ADD EDI,_GLOBAL_OFFSET_TABLE_+3
|
||
symbol *gotsym = elfobj_getGOTsym();
|
||
cgot = gencs(CNIL,0x81,modregrm(3,0,DI),FLextern,gotsym);
|
||
cgot->Iflags = CFoff;
|
||
cgot->IEVoffset2 = 3;
|
||
|
||
makeitextern(gotsym);
|
||
|
||
genmovreg(cgot, DX, DI); // MOV EDX, EDI
|
||
// ADD EDI,offset of switch table
|
||
esw = gencs(CNIL,0x81,modregrm(3,0,DI),FLswitch,NULL);
|
||
esw->IEV2.Vswitch = b;
|
||
esw = cat3(cx, cgot, esw);
|
||
}
|
||
else
|
||
#endif
|
||
{
|
||
// MOV DI,offset of switch table
|
||
esw = gencs(CNIL,0xC7,modregrm(3,0,DI),FLswitch,NULL);
|
||
esw->IEV2.Vswitch = b;
|
||
}
|
||
ce = cat(ce,esw);
|
||
movregconst(ce,CX,ncases,0); /* MOV CX,ncases */
|
||
|
||
/* The switch table will be accessed through ES:DI.
|
||
* Therefore, load ES with proper segment value.
|
||
*/
|
||
if (config.flags3 & CFG3eseqds)
|
||
{ assert(!(config.flags & CFGromable));
|
||
ce = cat(ce,getregs(mCX)); // allocate CX
|
||
}
|
||
else
|
||
{
|
||
ce = cat(ce,getregs(mES|mCX)); // allocate ES and CX
|
||
gen1(ce,(config.flags & CFGromable) ? 0x0E : 0x1E); // PUSH CS/DS
|
||
gen1(ce,0x07); // POP ES
|
||
}
|
||
|
||
disp = (ncases - 1) * intsize; /* displacement to jump table */
|
||
if (dword && !mswsame)
|
||
{ code *cloop;
|
||
|
||
/* Build the following:
|
||
L1: SCASW
|
||
JNE L2
|
||
CMP DX,[CS:]disp[DI]
|
||
L2: LOOPNE L1
|
||
*/
|
||
|
||
mod = (disp > 127) ? 2 : 1; /* displacement size */
|
||
cloop = genc2(CNIL,0xE0,0,-7 - mod -
|
||
((config.flags & CFGromable) ? 1 : 0)); /* LOOPNE scasw */
|
||
ce = gen1(ce,0xAF); /* SCASW */
|
||
code_orflag(ce,CFtarg2); // target of jump
|
||
genjmp(ce,JNE,FLcode,(block *) cloop); /* JNE loop */
|
||
/* CMP DX,[CS:]disp[DI] */
|
||
ct = genc1(CNIL,0x39,modregrm(mod,DX,5),FLconst,disp);
|
||
int flags = (config.flags & CFGromable) ? CFcs : 0; // table is in code seg
|
||
ct->Iflags |= flags; // possible seg override
|
||
ce = cat3(ce,ct,cloop);
|
||
disp += ncases * intsize; /* skip over msw table */
|
||
}
|
||
else
|
||
{
|
||
ce = gen1(ce,0xF2); /* REPNE */
|
||
gen1(ce,0xAF); /* SCASW */
|
||
}
|
||
genjmp(ce,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
|
||
mod = (disp > 127) ? 2 : 1; /* 1 or 2 byte displacement */
|
||
if (config.flags & CFGromable)
|
||
gen1(ce,SEGCS); /* table is in code segment */
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
if (config.flags3 & CFG3pic)
|
||
{ // ADD EDX,(ncases-1)*2[EDI]
|
||
ct = genc1(CNIL,0x03,modregrm(mod,DX,7),FLconst,disp);
|
||
// JMP EDX
|
||
gen2(ct,0xFF,modregrm(3,4,DX));
|
||
}
|
||
else
|
||
#endif
|
||
{ // JMP (ncases-1)*2[DI]
|
||
ct = genc1(CNIL,0xFF,modregrm(mod,4,(I32 ? 7 : 5)),FLconst,disp);
|
||
int flags = (config.flags & CFGromable) ? CFcs : 0; // table is in code seg
|
||
ct->Iflags |= flags;
|
||
}
|
||
ce = cat(ce,ct);
|
||
b->Btablesize = disp + intsize + ncases * tysize[TYnptr];
|
||
}
|
||
b->Bcode = cat3(cc,c,ce);
|
||
//assert(b->Bcode);
|
||
cgstate.stackclean--;
|
||
}
|
||
|
||
/******************************
|
||
* Output data block for a jump table (BCjmptab).
|
||
* The 'holes' in the table get filled with the
|
||
* default label.
|
||
*/
|
||
|
||
void outjmptab(block *b)
|
||
{
|
||
unsigned ncases,n;
|
||
targ_llong u,vmin,vmax,val,*p;
|
||
targ_size_t alignbytes,def,targ,*poffset;
|
||
int jmpseg;
|
||
|
||
poffset = (config.flags & CFGromable) ? &Coffset : &JMPOFF;
|
||
p = b->BS.Bswitch; /* pointer to case data */
|
||
ncases = *p++; /* number of cases */
|
||
vmax = MINLL; // smallest possible llong
|
||
vmin = MAXLL; // largest possible llong
|
||
for (n = 0; n < ncases; n++) /* find min case value */
|
||
{ val = p[n];
|
||
if (val > vmax) vmax = val;
|
||
if (val < vmin) vmin = val;
|
||
}
|
||
jmpseg = (config.flags & CFGromable) ? cseg : JMPSEG;
|
||
|
||
/* Any alignment bytes necessary */
|
||
alignbytes = align(0,*poffset) - *poffset;
|
||
obj_lidata(jmpseg,*poffset,alignbytes);
|
||
|
||
def = list_block(b->Bsucc)->Boffset; /* default address */
|
||
assert(vmin <= vmax);
|
||
for (u = vmin; ; u++)
|
||
{ targ = def; /* default */
|
||
for (n = 0; n < ncases; n++)
|
||
{ if (p[n] == u)
|
||
{ targ = list_block(list_nth(b->Bsucc,n + 1))->Boffset;
|
||
break;
|
||
}
|
||
}
|
||
reftocodseg(jmpseg,*poffset,targ);
|
||
*poffset += tysize[TYnptr];
|
||
if (u == vmax) /* for case that (vmax == ~0) */
|
||
break;
|
||
}
|
||
}
|
||
|
||
/******************************
|
||
* Output data block for a switch table.
|
||
* Two consecutive tables, the first is the case value table, the
|
||
* second is the address table.
|
||
*/
|
||
|
||
void outswitab(block *b)
|
||
{ unsigned ncases,n;
|
||
targ_llong *p;
|
||
targ_size_t val;
|
||
targ_size_t alignbytes,*poffset;
|
||
int seg; /* target segment for table */
|
||
list_t bl;
|
||
unsigned sz;
|
||
targ_size_t offset;
|
||
|
||
//printf("outswitab()\n");
|
||
p = b->BS.Bswitch; /* pointer to case data */
|
||
ncases = *p++; /* number of cases */
|
||
|
||
if (config.flags & CFGromable)
|
||
{ poffset = &Coffset;
|
||
assert(cseg == CODE);
|
||
seg = cseg;
|
||
}
|
||
else
|
||
{
|
||
poffset = &JMPOFF;
|
||
seg = JMPSEG;
|
||
}
|
||
offset = *poffset;
|
||
alignbytes = align(0,*poffset) - *poffset;
|
||
obj_lidata(seg,*poffset,alignbytes); /* any alignment bytes necessary */
|
||
assert(*poffset == offset + alignbytes);
|
||
|
||
sz = intsize;
|
||
for (n = 0; n < ncases; n++) /* send out value table */
|
||
{
|
||
//printf("\tcase %d, offset = x%x\n", n, *poffset);
|
||
#if OMFOBJ
|
||
*poffset +=
|
||
#endif
|
||
obj_bytes(seg,*poffset,sz,p);
|
||
p++;
|
||
}
|
||
offset += alignbytes + sz * ncases;
|
||
assert(*poffset == offset);
|
||
|
||
if (b->Btablesize == ncases * (REGSIZE * 2 + tysize[TYnptr]))
|
||
{
|
||
/* Send out MSW table */
|
||
p -= ncases;
|
||
for (n = 0; n < ncases; n++)
|
||
{ val = MSREG(*p);
|
||
p++;
|
||
#if OMFOBJ
|
||
*poffset +=
|
||
#endif
|
||
obj_bytes(seg,*poffset,REGSIZE,&val);
|
||
}
|
||
offset += REGSIZE * ncases;
|
||
assert(*poffset == offset);
|
||
}
|
||
|
||
bl = b->Bsucc;
|
||
for (n = 0; n < ncases; n++) /* send out address table */
|
||
{ bl = list_next(bl);
|
||
reftocodseg(seg,*poffset,list_block(bl)->Boffset);
|
||
*poffset += tysize[TYnptr];
|
||
}
|
||
assert(*poffset == offset + ncases * tysize[TYnptr]);
|
||
}
|
||
|
||
/*****************************
|
||
* Return a jump opcode relevant to the elem for a JMP TRUE.
|
||
*/
|
||
|
||
int jmpopcode(elem *e)
|
||
{ tym_t tym;
|
||
int zero,i,jp,op;
|
||
static const char jops[][2][6] =
|
||
{ /* <= > < >= == != <=0 >0 <0 >=0 ==0 !=0 */
|
||
{ {JLE,JG ,JL ,JGE,JE ,JNE},{JLE,JG ,JS ,JNS,JE ,JNE} }, /* signed */
|
||
{ {JBE,JA ,JB ,JAE,JE ,JNE},{JE ,JNE,JB ,JAE,JE ,JNE} }, /* unsigned */
|
||
#if 0
|
||
{ {JLE,JG ,JL ,JGE,JE ,JNE},{JLE,JG ,JL ,JGE,JE ,JNE} }, /* real */
|
||
{ {JBE,JA ,JB ,JAE,JE ,JNE},{JBE,JA ,JB ,JAE,JE ,JNE} }, /* 8087 */
|
||
{ {JA ,JBE,JAE,JB ,JE ,JNE},{JBE,JA ,JB ,JAE,JE ,JNE} }, /* 8087 R */
|
||
#endif
|
||
};
|
||
|
||
#define XP (JP << 8)
|
||
#define XNP (JNP << 8)
|
||
static const unsigned jfops[1][26] =
|
||
/* le gt lt ge eqeq ne unord lg leg ule ul uge */
|
||
{
|
||
{ XNP|JBE,JA,XNP|JB,JAE,XNP|JE, XP|JNE,JP, JNE,JNP, JBE,JC,XP|JAE,
|
||
|
||
/* ug ue ngt nge nlt nle ord nlg nleg nule nul nuge nug nue */
|
||
XP|JA,JE,JBE,JB, XP|JAE,XP|JA, JNP,JE, JP, JA, JNC,XNP|JB, XNP|JBE,JNE }, /* 8087 */
|
||
};
|
||
|
||
assert(e);
|
||
while (e->Eoper == OPcomma ||
|
||
/* The !EOP(e->E1) is to line up with the case in cdeq() where */
|
||
/* we decide if mPSW is passed on when evaluating E2 or not. */
|
||
(e->Eoper == OPeq && !EOP(e->E1)))
|
||
e = e->E2; /* right operand determines it */
|
||
|
||
op = e->Eoper;
|
||
if (e->Ecount != e->Ecomsub) // comsubs just get Z bit set
|
||
return JNE;
|
||
if (!OTrel(op)) // not relational operator
|
||
{
|
||
tym_t tymx = tybasic(e->Ety);
|
||
if (tyfloating(tymx) && config.inline8087 &&
|
||
(tymx == TYldouble || tymx == TYildouble || tymx == TYcldouble ||
|
||
tymx == TYcdouble || tymx == TYcfloat ||
|
||
op == OPind))
|
||
{
|
||
return XP|JNE;
|
||
}
|
||
return (op >= OPbt && op <= OPbts) ? JC : JNE;
|
||
}
|
||
|
||
if (e->E2->Eoper == OPconst)
|
||
zero = !boolres(e->E2);
|
||
else
|
||
zero = 0;
|
||
|
||
tym = e->E1->Ety;
|
||
if (tyfloating(tym))
|
||
#if 1
|
||
{ i = 0;
|
||
if (config.inline8087)
|
||
{ i = 1;
|
||
|
||
#if 1
|
||
#define NOSAHF (I64 || config.fpxmmregs)
|
||
if (rel_exception(op) || config.flags4 & CFG4fastfloat)
|
||
{
|
||
if (zero)
|
||
{
|
||
if (NOSAHF)
|
||
op = swaprel(op);
|
||
}
|
||
else if (NOSAHF)
|
||
op = swaprel(op);
|
||
else if (cmporder87(e->E2))
|
||
op = swaprel(op);
|
||
else
|
||
;
|
||
}
|
||
else
|
||
{
|
||
if (zero && config.target_cpu < TARGET_80386)
|
||
;
|
||
else
|
||
op = swaprel(op);
|
||
}
|
||
#else
|
||
if (zero && !rel_exception(op) && config.target_cpu >= TARGET_80386)
|
||
op = swaprel(op);
|
||
else if (!zero &&
|
||
(cmporder87(e->E2) || !(rel_exception(op) || config.flags4 & CFG4fastfloat)))
|
||
/* compare is reversed */
|
||
op = swaprel(op);
|
||
#endif
|
||
}
|
||
jp = jfops[0][op - OPle];
|
||
goto L1;
|
||
}
|
||
#else
|
||
i = (config.inline8087) ? (3 + cmporder87(e->E2)) : 2;
|
||
#endif
|
||
else if (tyuns(tym) || tyuns(e->E2->Ety))
|
||
i = 1;
|
||
else if (tyintegral(tym) || typtr(tym))
|
||
i = 0;
|
||
else
|
||
{
|
||
#if DEBUG
|
||
elem_print(e);
|
||
WRTYxx(tym);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
|
||
jp = jops[i][zero][op - OPle]; /* table starts with OPle */
|
||
L1:
|
||
#if DEBUG
|
||
if ((jp & 0xF0) != 0x70)
|
||
WROP(op),
|
||
printf("i %d zero %d op x%x jp x%x\n",i,zero,op,jp);
|
||
#endif
|
||
assert((jp & 0xF0) == 0x70);
|
||
return jp;
|
||
}
|
||
|
||
/**********************************
|
||
* Append code to *pc which validates pointer described by
|
||
* addressing mode in *pcs. Modify addressing mode in *pcs.
|
||
* Input:
|
||
* keepmsk mask of registers we must not destroy or use
|
||
* if (keepmsk & RMstore), this will be only a store operation
|
||
* into the lvalue
|
||
*/
|
||
|
||
void cod3_ptrchk(code **pc,code *pcs,regm_t keepmsk)
|
||
{ code *c;
|
||
code *cs2;
|
||
unsigned char rm,sib;
|
||
unsigned reg;
|
||
unsigned flagsave;
|
||
unsigned opsave;
|
||
regm_t idxregs;
|
||
regm_t tosave;
|
||
regm_t used;
|
||
int i;
|
||
|
||
assert(!I64);
|
||
if (!I16 && pcs->Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs))
|
||
return; // not designed to deal with 48 bit far pointers
|
||
|
||
c = *pc;
|
||
|
||
rm = pcs->Irm;
|
||
assert(!(rm & 0x40)); // no disp8 or reg addressing modes
|
||
|
||
// If the addressing mode is already a register
|
||
reg = rm & 7;
|
||
if (I16)
|
||
{ static const unsigned char imode[8] = { BP,BP,BP,BP,SI,DI,BP,BX };
|
||
|
||
reg = imode[reg]; // convert [SI] to SI, etc.
|
||
}
|
||
idxregs = mask[reg];
|
||
if ((rm & 0x80 && (pcs->IFL1 != FLoffset || pcs->IEV1.Vuns)) ||
|
||
!(idxregs & ALLREGS)
|
||
)
|
||
{
|
||
// Load the offset into a register, so we can push the address
|
||
idxregs = (I16 ? IDXREGS : ALLREGS) & ~keepmsk; // only these can be index regs
|
||
assert(idxregs);
|
||
c = cat(c,allocreg(&idxregs,®,TYoffset));
|
||
|
||
opsave = pcs->Iop;
|
||
flagsave = pcs->Iflags;
|
||
pcs->Iop = 0x8D;
|
||
pcs->Irm |= modregrm(0,reg,0);
|
||
pcs->Iflags &= ~(CFopsize | CFss | CFes | CFcs); // no prefix bytes needed
|
||
c = gen(c,pcs); // LEA reg,EA
|
||
|
||
pcs->Iflags = flagsave;
|
||
pcs->Iop = opsave;
|
||
}
|
||
|
||
// registers destroyed by the function call
|
||
//used = (mBP | ALLREGS | mES) & ~fregsaved;
|
||
used = 0; // much less code generated this way
|
||
|
||
cs2 = CNIL;
|
||
tosave = used & (keepmsk | idxregs);
|
||
for (i = 0; tosave; i++)
|
||
{ regm_t mi = mask[i];
|
||
|
||
assert(i < REGMAX);
|
||
if (mi & tosave) /* i = register to save */
|
||
{
|
||
int push,pop;
|
||
|
||
stackchanged = 1;
|
||
if (i == ES)
|
||
{ push = 0x06;
|
||
pop = 0x07;
|
||
}
|
||
else
|
||
{ push = 0x50 + i;
|
||
pop = push | 8;
|
||
}
|
||
c = gen1(c,push); // PUSH i
|
||
cs2 = cat(gen1(CNIL,pop),cs2); // POP i
|
||
tosave &= ~mi;
|
||
}
|
||
}
|
||
|
||
// For 16 bit models, push a far pointer
|
||
if (I16)
|
||
{ int segreg;
|
||
|
||
switch (pcs->Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs))
|
||
{ case CFes: segreg = 0x06; break;
|
||
case CFss: segreg = 0x16; break;
|
||
case CFcs: segreg = 0x0E; break;
|
||
case 0: segreg = 0x1E; break; // DS
|
||
default:
|
||
assert(0);
|
||
}
|
||
|
||
// See if we should default to SS:
|
||
// (Happens when BP is part of the addressing mode)
|
||
if (segreg == 0x1E && (rm & 0xC0) != 0xC0 &&
|
||
rm & 2 && (rm & 7) != 7)
|
||
{ segreg = 0x16;
|
||
if (config.wflags & WFssneds)
|
||
pcs->Iflags |= CFss; // because BP won't be there anymore
|
||
}
|
||
c = gen1(c,segreg); // PUSH segreg
|
||
}
|
||
|
||
c = gen1(c,0x50 + reg); // PUSH reg
|
||
|
||
// Rewrite the addressing mode in *pcs so it is just 0[reg]
|
||
setaddrmode(pcs, idxregs);
|
||
pcs->IFL1 = FLoffset;
|
||
pcs->IEV1.Vuns = 0;
|
||
|
||
// Call the validation function
|
||
{
|
||
makeitextern(rtlsym[RTLSYM_PTRCHK]);
|
||
|
||
used &= ~(keepmsk | idxregs); // regs destroyed by this exercise
|
||
c = cat(c,getregs(used));
|
||
// CALL __ptrchk
|
||
gencs(c,(LARGECODE) ? 0x9A : CALL,0,FLfunc,rtlsym[RTLSYM_PTRCHK]);
|
||
}
|
||
|
||
*pc = cat(c,cs2);
|
||
}
|
||
|
||
|
||
|
||
/***********************************
|
||
* Determine if BP can be used as a general purpose register.
|
||
* Note parallels between this routine and prolog().
|
||
* Returns:
|
||
* 0 can't be used, needed for frame
|
||
* mBP can be used
|
||
*/
|
||
|
||
regm_t cod3_useBP()
|
||
{
|
||
tym_t tym;
|
||
tym_t tyf;
|
||
|
||
// Note that DOSX memory model cannot use EBP as a general purpose
|
||
// register, as SS != DS.
|
||
if (!(config.exe & EX_flat) || config.flags & (CFGalwaysframe | CFGnoebp))
|
||
goto Lcant;
|
||
|
||
if (anyiasm)
|
||
goto Lcant;
|
||
|
||
tyf = funcsym_p->ty();
|
||
if (tyf & mTYnaked) // if no prolog/epilog for function
|
||
goto Lcant;
|
||
|
||
if (funcsym_p->Sfunc->Fflags3 & Ffakeeh)
|
||
{
|
||
goto Lcant; // need consistent stack frame
|
||
}
|
||
|
||
tym = tybasic(tyf);
|
||
if (tym == TYifunc)
|
||
goto Lcant;
|
||
|
||
stackoffsets(0);
|
||
localsize = Aoffset; // an estimate only
|
||
// if (localsize)
|
||
{
|
||
if (!(config.flags4 & CFG4speed) ||
|
||
config.target_cpu < TARGET_Pentium ||
|
||
tyfarfunc(tym) ||
|
||
config.flags & CFGstack ||
|
||
localsize >= 0x100 || // arbitrary value < 0x1000
|
||
(usednteh & ~NTEHjmonitor) ||
|
||
usedalloca
|
||
)
|
||
goto Lcant;
|
||
}
|
||
Lcan:
|
||
return mBP;
|
||
|
||
Lcant:
|
||
return 0;
|
||
}
|
||
|
||
/***************************************
|
||
* Gen code for OPframeptr
|
||
*/
|
||
|
||
code *cdframeptr(elem *e, regm_t *pretregs)
|
||
{
|
||
unsigned reg;
|
||
code cs;
|
||
|
||
regm_t retregs = *pretregs & allregs;
|
||
if (!retregs)
|
||
retregs = allregs;
|
||
code *cg = allocreg(&retregs, ®, TYint);
|
||
|
||
cs.Iop = ESCAPE | ESCframeptr;
|
||
cs.Iflags = 0;
|
||
cs.Irex = 0;
|
||
cs.Irm = reg;
|
||
cg = gen(cg,&cs);
|
||
|
||
return cat(cg,fixresult(e,retregs,pretregs));
|
||
}
|
||
|
||
/***************************************
|
||
* Gen code for load of _GLOBAL_OFFSET_TABLE_.
|
||
* This value gets cached in the local variable 'localgot'.
|
||
*/
|
||
|
||
code *cdgot(elem *e, regm_t *pretregs)
|
||
{
|
||
#if TARGET_OSX
|
||
regm_t retregs;
|
||
unsigned reg;
|
||
code *c;
|
||
|
||
retregs = *pretregs & allregs;
|
||
if (!retregs)
|
||
retregs = allregs;
|
||
c = allocreg(&retregs, ®, TYnptr);
|
||
|
||
c = genc(c,CALL,0,0,0,FLgot,0); // CALL L1
|
||
gen1(c, 0x58 + reg); // L1: POP reg
|
||
|
||
return cat(c,fixresult(e,retregs,pretregs));
|
||
#elif TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
regm_t retregs;
|
||
unsigned reg;
|
||
code *c;
|
||
code *cgot;
|
||
|
||
retregs = *pretregs & allregs;
|
||
if (!retregs)
|
||
retregs = allregs;
|
||
c = allocreg(&retregs, ®, TYnptr);
|
||
|
||
c = genc2(c,CALL,0,0); // CALL L1
|
||
gen1(c, 0x58 + reg); // L1: POP reg
|
||
|
||
// ADD reg,_GLOBAL_OFFSET_TABLE_+3
|
||
symbol *gotsym = elfobj_getGOTsym();
|
||
cgot = gencs(CNIL,0x81,modregrm(3,0,reg),FLextern,gotsym);
|
||
/* Because the 2:3 offset from L1: is hardcoded,
|
||
* this sequence of instructions must not
|
||
* have any instructions in between,
|
||
* so set CFvolatile to prevent the scheduler from rearranging it.
|
||
*/
|
||
cgot->Iflags = CFoff | CFvolatile;
|
||
cgot->IEVoffset2 = (reg == AX) ? 2 : 3;
|
||
|
||
makeitextern(gotsym);
|
||
return cat3(c,cgot,fixresult(e,retregs,pretregs));
|
||
#else
|
||
assert(0);
|
||
return NULL;
|
||
#endif
|
||
}
|
||
|
||
/**************************************************
|
||
* Load contents of localgot into EBX.
|
||
*/
|
||
|
||
code *load_localgot()
|
||
{
|
||
#if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
if (config.flags3 & CFG3pic && I32)
|
||
{
|
||
if (localgot)
|
||
{
|
||
localgot->Sflags &= ~GTregcand; // because this hack doesn't work with reg allocator
|
||
elem *e = el_var(localgot);
|
||
regm_t retregs = mBX;
|
||
code *c = codelem(e,&retregs,FALSE);
|
||
el_free(e);
|
||
return c;
|
||
}
|
||
else
|
||
{
|
||
elem *e = el_long(TYnptr, 0);
|
||
e->Eoper = OPgot;
|
||
regm_t retregs = mBX;
|
||
code *c = codelem(e,&retregs,FALSE);
|
||
el_free(e);
|
||
return c;
|
||
}
|
||
}
|
||
#endif
|
||
return NULL;
|
||
}
|
||
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
/*****************************
|
||
* Returns:
|
||
* # of bytes stored
|
||
*/
|
||
|
||
#define ONS_OHD 4 // max # of extra bytes added by obj_namestring()
|
||
|
||
STATIC int obj_namestring(char *p,const char *name)
|
||
{ unsigned len;
|
||
|
||
len = strlen(name);
|
||
if (len > 255)
|
||
{
|
||
short *ps = (short *)p;
|
||
p[0] = 0xFF;
|
||
p[1] = 0;
|
||
ps[1] = len;
|
||
memcpy(p + 4,name,len);
|
||
len += ONS_OHD;
|
||
}
|
||
else
|
||
{ p[0] = len;
|
||
memcpy(p + 1,name,len);
|
||
len++;
|
||
}
|
||
return len;
|
||
}
|
||
#endif
|
||
|
||
code *genregs(code *c,unsigned op,unsigned dstreg,unsigned srcreg)
|
||
{ return gen2(c,op,modregxrmx(3,dstreg,srcreg)); }
|
||
|
||
code *gentstreg(code *c,unsigned t)
|
||
{
|
||
c = gen2(c,0x85,modregxrmx(3,t,t)); // TEST t,t
|
||
code_orflag(c,CFpsw);
|
||
return c;
|
||
}
|
||
|
||
code *genpush(code *c, unsigned reg)
|
||
{
|
||
c = gen1(c, 0x50 + (reg & 7));
|
||
if (reg & 8)
|
||
code_orrex(c, REX_B);
|
||
return c;
|
||
}
|
||
|
||
code *genpop(code *c, unsigned reg)
|
||
{
|
||
c = gen1(c, 0x58 + (reg & 7));
|
||
if (reg & 8)
|
||
code_orrex(c, REX_B);
|
||
return c;
|
||
}
|
||
|
||
/**************************
|
||
* Generate a MOV to save a register to a stack slot
|
||
*/
|
||
code *gensavereg(unsigned& reg, targ_uns slot)
|
||
{
|
||
// MOV i[BP],reg
|
||
unsigned op = 0x89; // normal mov
|
||
if (reg == ES)
|
||
{ reg = 0; // the real reg number
|
||
op = 0x8C; // segment reg mov
|
||
}
|
||
code *c = genc1(NULL,op,modregxrm(2, reg, BPRM),FLcs,slot);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
|
||
return c;
|
||
}
|
||
|
||
/**************************
|
||
* Generate a MOV to,from register instruction.
|
||
* Smart enough to dump redundant register moves, and segment
|
||
* register moves.
|
||
*/
|
||
|
||
code *genmovreg(code *c,unsigned to,unsigned from)
|
||
{
|
||
#if DEBUG
|
||
if (to > ES || from > ES)
|
||
printf("genmovreg(c = %p, to = %d, from = %d)\n",c,to,from);
|
||
#endif
|
||
assert(to <= ES && from <= ES);
|
||
if (to != from)
|
||
{
|
||
if (to == ES)
|
||
c = genregs(c,0x8E,0,from);
|
||
else if (from == ES)
|
||
c = genregs(c,0x8C,0,to);
|
||
else
|
||
c = genregs(c,0x89,from,to);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
return c;
|
||
}
|
||
|
||
/***************************************
|
||
* Generate immediate multiply instruction for r1=r2*imm.
|
||
* Optimize it into LEA's if we can.
|
||
*/
|
||
|
||
code *genmulimm(code *c,unsigned r1,unsigned r2,targ_int imm)
|
||
{ code cs;
|
||
|
||
// These optimizations should probably be put into pinholeopt()
|
||
switch (imm)
|
||
{ case 1:
|
||
c = genmovreg(c,r1,r2);
|
||
break;
|
||
case 5:
|
||
cs.Iop = LEA;
|
||
cs.Iflags = 0;
|
||
cs.Irex = 0;
|
||
buildEA(&cs,r2,r2,4,0);
|
||
cs.orReg(r1);
|
||
c = gen(c,&cs);
|
||
break;
|
||
default:
|
||
c = genc2(c,0x69,modregxrmx(3,r1,r2),imm); // IMUL r1,r2,imm
|
||
break;
|
||
}
|
||
return c;
|
||
}
|
||
|
||
/******************************
|
||
* Load CX with the value of _AHSHIFT.
|
||
*/
|
||
|
||
code *genshift(code *c)
|
||
{
|
||
#if SCPP && TX86
|
||
code *c1;
|
||
|
||
// Set up ahshift to trick ourselves into giving the right fixup,
|
||
// which must be seg-relative, external frame, external target.
|
||
c1 = gencs(CNIL,0xC7,modregrm(3,0,CX),FLfunc,rtlsym[RTLSYM_AHSHIFT]);
|
||
c1->Iflags |= CFoff;
|
||
return cat(c,c1);
|
||
#else
|
||
assert(0);
|
||
return 0;
|
||
#endif
|
||
}
|
||
|
||
/******************************
|
||
* Move constant value into reg.
|
||
* Take advantage of existing values in registers.
|
||
* If flags & mPSW
|
||
* set flags based on result
|
||
* Else if flags & 8
|
||
* do not disturb flags
|
||
* Else
|
||
* don't care about flags
|
||
* If flags & 1 then byte move
|
||
* If flags & 2 then short move (for I32 and I64)
|
||
* If flags & 4 then don't disturb unused portion of register
|
||
* If flags & 16 then reg is a byte register AL..BH
|
||
* If flags & 64 (0x40) then 64 bit move (I64 only)
|
||
* Returns:
|
||
* code (if any) generated
|
||
*/
|
||
|
||
code *movregconst(code *c,unsigned reg,targ_size_t value,regm_t flags)
|
||
{ unsigned r;
|
||
regm_t mreg;
|
||
|
||
//printf("movregconst(reg=%s, value= %lld (%llx), flags=%x)\n", regm_str(mask[reg]), value, value, flags);
|
||
#define genclrreg(a,r) genregs(a,0x31,r,r)
|
||
|
||
regm_t regm = regcon.immed.mval & mask[reg];
|
||
targ_size_t regv = regcon.immed.value[reg];
|
||
|
||
if (flags & 1) // 8 bits
|
||
{
|
||
value &= 0xFF;
|
||
regm &= BYTEREGS;
|
||
|
||
// If we already have the right value in the right register
|
||
if (regm && (regv & 0xFF) == value)
|
||
goto L2;
|
||
|
||
if (flags & 16 && reg & 4 && // if an H byte register
|
||
regcon.immed.mval & mask[reg & 3] &&
|
||
(((regv = regcon.immed.value[reg & 3]) >> 8) & 0xFF) == value)
|
||
goto L2;
|
||
|
||
/* Avoid byte register loads on Pentium Pro and Pentium II
|
||
* to avoid dependency stalls.
|
||
*/
|
||
if (config.flags4 & CFG4speed &&
|
||
config.target_cpu >= TARGET_PentiumPro && !(flags & 4))
|
||
goto L3;
|
||
|
||
// See if another register has the right value
|
||
r = 0;
|
||
for (mreg = (regcon.immed.mval & BYTEREGS); mreg; mreg >>= 1)
|
||
{
|
||
if (mreg & 1)
|
||
{
|
||
if ((regcon.immed.value[r] & 0xFF) == value)
|
||
{ c = genregs(c,0x8A,reg,r); // MOV regL,rL
|
||
if (I64 && reg >= 4 || r >= 4)
|
||
code_orrex(c, REX);
|
||
goto L2;
|
||
}
|
||
if (!(I64 && reg >= 4) &&
|
||
r < 4 && ((regcon.immed.value[r] >> 8) & 0xFF) == value)
|
||
{ c = genregs(c,0x8A,reg,r | 4); // MOV regL,rH
|
||
goto L2;
|
||
}
|
||
}
|
||
r++;
|
||
}
|
||
|
||
if (value == 0 && !(flags & 8))
|
||
{
|
||
if (!(flags & 4) && // if we can set the whole register
|
||
!(flags & 16 && reg & 4)) // and reg is not an H register
|
||
{ c = genregs(c,0x31,reg,reg); // XOR reg,reg
|
||
regimmed_set(reg,value);
|
||
regv = 0;
|
||
}
|
||
else
|
||
c = genregs(c,0x30,reg,reg); // XOR regL,regL
|
||
flags &= ~mPSW; // flags already set by XOR
|
||
}
|
||
else
|
||
{ c = genc2(c,0xC6,modregrmx(3,0,reg),value); /* MOV regL,value */
|
||
if (reg >= 4 && I64)
|
||
{
|
||
code_orrex(c, REX);
|
||
}
|
||
}
|
||
L2:
|
||
if (flags & mPSW)
|
||
genregs(c,0x84,reg,reg); // TEST regL,regL
|
||
|
||
if (regm)
|
||
// Set just the 'L' part of the register value
|
||
regimmed_set(reg,(regv & ~(targ_size_t)0xFF) | value);
|
||
else if (flags & 16 && reg & 4 && regcon.immed.mval & mask[reg & 3])
|
||
// Set just the 'H' part of the register value
|
||
regimmed_set((reg & 3),(regv & ~(targ_size_t)0xFF00) | (value << 8));
|
||
return c;
|
||
}
|
||
L3:
|
||
if (I16)
|
||
value = (targ_short) value; /* sign-extend MSW */
|
||
else if (I32)
|
||
value = (targ_int) value;
|
||
|
||
if (!I16 && flags & 2) // load 16 bit value
|
||
{
|
||
value &= 0xFFFF;
|
||
if (value == 0)
|
||
goto L1;
|
||
else
|
||
{
|
||
if (flags & mPSW)
|
||
goto L1;
|
||
code *c1 = genc2(CNIL,0xC7,modregrmx(3,0,reg),value); // MOV reg,value
|
||
c1->Iflags |= CFopsize; // yes, even for I64
|
||
c = cat(c,c1);
|
||
if (regm)
|
||
// High bits of register are not affected by 16 bit load
|
||
regimmed_set(reg,(regv & ~(targ_size_t)0xFFFF) | value);
|
||
}
|
||
return c;
|
||
}
|
||
L1:
|
||
|
||
/* If we already have the right value in the right register */
|
||
if (regm && (regv & 0xFFFFFFFF) == (value & 0xFFFFFFFF) && !(flags & 64))
|
||
{ if (flags & mPSW)
|
||
c = gentstreg(c,reg);
|
||
}
|
||
else if (flags & 64 && regm && regv == value)
|
||
{ // Look at the full 64 bits
|
||
if (flags & mPSW)
|
||
{
|
||
c = gentstreg(c,reg);
|
||
code_orrex(c, REX_W);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if (flags & mPSW)
|
||
{
|
||
switch (value)
|
||
{ case 0:
|
||
c = genclrreg(c,reg);
|
||
if (flags & 64)
|
||
code_orrex(c, REX_W);
|
||
break;
|
||
case 1:
|
||
if (I64)
|
||
goto L4;
|
||
c = genclrreg(c,reg);
|
||
goto inc;
|
||
case -1:
|
||
if (I64)
|
||
goto L4;
|
||
c = genclrreg(c,reg);
|
||
goto dec;
|
||
default:
|
||
L4:
|
||
if (flags & 64)
|
||
{
|
||
c = genc2(c,0xC7,(REX_W << 16) | modregrmx(3,0,reg),value); // MOV reg,value64
|
||
gentstreg(c,reg);
|
||
code_orrex(c, REX_W);
|
||
}
|
||
else
|
||
{ c = genc2(c,0xC7,modregrmx(3,0,reg),value); /* MOV reg,value */
|
||
gentstreg(c,reg);
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
/* Look for single byte conversion */
|
||
if (regcon.immed.mval & mAX)
|
||
{
|
||
if (I32)
|
||
{ if (reg == AX && value == (targ_short) regv)
|
||
{ c = gen1(c,0x98); /* CWDE */
|
||
goto done;
|
||
}
|
||
if (reg == DX &&
|
||
value == (regcon.immed.value[AX] & 0x80000000 ? 0xFFFFFFFF : 0) &&
|
||
!(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
|
||
)
|
||
{ c = gen1(c,0x99); /* CDQ */
|
||
goto done;
|
||
}
|
||
}
|
||
else if (I16)
|
||
{
|
||
if (reg == AX &&
|
||
(targ_short) value == (signed char) regv)
|
||
{ c = gen1(c,0x98); /* CBW */
|
||
goto done;
|
||
}
|
||
|
||
if (reg == DX &&
|
||
(targ_short) value == (regcon.immed.value[AX] & 0x8000 ? (targ_short) 0xFFFF : (targ_short) 0) &&
|
||
!(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
|
||
)
|
||
{ c = gen1(c,0x99); /* CWD */
|
||
goto done;
|
||
}
|
||
}
|
||
}
|
||
if (value == 0 && !(flags & 8) && config.target_cpu >= TARGET_80486)
|
||
{ c = genclrreg(c,reg); // CLR reg
|
||
if (flags & 64)
|
||
code_orrex(c, REX_W);
|
||
goto done;
|
||
}
|
||
|
||
if (!I64 && regm && !(flags & 8))
|
||
{ if (regv + 1 == value ||
|
||
/* Catch case of (0xFFFF+1 == 0) for 16 bit compiles */
|
||
(I16 && (targ_short)(regv + 1) == (targ_short)value))
|
||
{
|
||
inc:
|
||
c = gen1(c,0x40 + reg); /* INC reg */
|
||
goto done;
|
||
}
|
||
if (regv - 1 == value)
|
||
{
|
||
dec:
|
||
c = gen1(c,0x48 + reg); /* DEC reg */
|
||
goto done;
|
||
}
|
||
}
|
||
|
||
/* See if another register has the right value */
|
||
r = 0;
|
||
for (mreg = regcon.immed.mval; mreg; mreg >>= 1)
|
||
{
|
||
#ifdef DEBUG
|
||
assert(!I16 || regcon.immed.value[r] == (targ_short)regcon.immed.value[r]);
|
||
#endif
|
||
if (mreg & 1 && regcon.immed.value[r] == value)
|
||
{ c = genmovreg(c,reg,r);
|
||
if (flags & 64)
|
||
code_orrex(c, REX_W);
|
||
goto done;
|
||
}
|
||
r++;
|
||
}
|
||
|
||
if (value == 0 && !(flags & 8))
|
||
{ c = genclrreg(c,reg); // CLR reg
|
||
if (flags & 64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
else
|
||
{ /* See if we can just load a byte */
|
||
if (regm & BYTEREGS &&
|
||
!(config.flags4 & CFG4speed && config.target_cpu >= TARGET_PentiumPro)
|
||
)
|
||
{
|
||
if ((regv & ~(targ_size_t)0xFF) == (value & ~(targ_size_t)0xFF))
|
||
{ c = movregconst(c,reg,value,(flags & 8) |4|1); // load regL
|
||
return c;
|
||
}
|
||
if (regm & (mAX|mBX|mCX|mDX) &&
|
||
(regv & ~(targ_size_t)0xFF00) == (value & ~(targ_size_t)0xFF00) &&
|
||
!I64)
|
||
{ c = movregconst(c,4|reg,value >> 8,(flags & 8) |4|1|16); // load regH
|
||
return c;
|
||
}
|
||
}
|
||
if (flags & 64)
|
||
c = genc2(c,0xC7,(REX_W << 16) | modregrmx(3,0,reg),value); // MOV reg,value64
|
||
else
|
||
c = genc2(c,0xC7,modregrmx(3,0,reg),value); // MOV reg,value
|
||
}
|
||
}
|
||
done:
|
||
regimmed_set(reg,value);
|
||
}
|
||
return c;
|
||
}
|
||
|
||
/**************************
|
||
* Generate a jump instruction.
|
||
*/
|
||
|
||
code *genjmp(code *c,unsigned op,unsigned fltarg,block *targ)
|
||
{ code cs;
|
||
code *cj;
|
||
code *cnop;
|
||
|
||
cs.Iop = op & 0xFF;
|
||
cs.Iflags = 0;
|
||
cs.Irex = 0;
|
||
if (op != JMP && op != 0xE8) // if not already long branch
|
||
cs.Iflags = CFjmp16; /* assume long branch for op = 0x7x */
|
||
cs.IFL2 = fltarg; /* FLblock (or FLcode) */
|
||
cs.IEV2.Vblock = targ; /* target block (or code) */
|
||
if (fltarg == FLcode)
|
||
((code *)targ)->Iflags |= CFtarg;
|
||
|
||
if (config.flags4 & CFG4fastfloat) // if fast floating point
|
||
return gen(c,&cs);
|
||
|
||
cj = gen(CNIL,&cs);
|
||
switch (op & 0xFF00) /* look at second jump opcode */
|
||
{
|
||
/* The JP and JNP come from floating point comparisons */
|
||
case JP << 8:
|
||
cs.Iop = JP;
|
||
gen(cj,&cs);
|
||
break;
|
||
case JNP << 8:
|
||
/* Do a JP around the jump instruction */
|
||
cnop = gennop(CNIL);
|
||
c = genjmp(c,JP,FLcode,(block *) cnop);
|
||
cat(cj,cnop);
|
||
break;
|
||
case 1 << 8: /* toggled no jump */
|
||
case 0 << 8:
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
printf("jop = x%x\n",op);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
return cat(c,cj);
|
||
}
|
||
|
||
/*******************************
|
||
* Generate code for a function start.
|
||
* Input:
|
||
* Coffset address of start of code
|
||
* Output:
|
||
* Coffset adjusted for size of code generated
|
||
* EBPtoESP
|
||
* hasframe
|
||
* BPoff
|
||
*/
|
||
|
||
code *prolog()
|
||
{
|
||
SYMIDX si;
|
||
unsigned reg;
|
||
char enter;
|
||
unsigned Foffset;
|
||
unsigned xlocalsize; // amount to subtract from ESP to make room for locals
|
||
unsigned pushallocreg;
|
||
char guessneedframe;
|
||
regm_t namedargs = 0;
|
||
|
||
//printf("cod3.prolog(), needframe = %d, Aalign = %d\n", needframe, Aalign);
|
||
debugx(debugw && printf("funcstart()\n"));
|
||
regcon.immed.mval = 0; /* no values in registers yet */
|
||
EBPtoESP = -REGSIZE;
|
||
hasframe = 0;
|
||
char pushds = 0;
|
||
BPoff = 0;
|
||
code *c = CNIL;
|
||
int pushalloc = 0;
|
||
tym_t tyf = funcsym_p->ty();
|
||
tym_t tym = tybasic(tyf);
|
||
unsigned farfunc = tyfarfunc(tym);
|
||
pushallocreg = (tyf == TYmfunc) ? CX : AX;
|
||
if (config.flags & CFGalwaysframe || funcsym_p->Sfunc->Fflags3 & Ffakeeh)
|
||
needframe = 1;
|
||
|
||
Lagain:
|
||
guessneedframe = needframe;
|
||
// if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_SOLARIS) && !(usednteh & ~NTEHjmonitor))
|
||
// usednteh |= NTEHpassthru;
|
||
|
||
/* Compute BP offsets for variables on stack.
|
||
* The organization is:
|
||
* Poff parameters
|
||
* seg of return addr (if far function)
|
||
* IP of return addr
|
||
* BP-> caller's BP
|
||
* DS (if Windows prolog/epilog)
|
||
* exception handling context symbol
|
||
* Aoff autos and regs
|
||
* regsave.off any saved registers
|
||
* Foff floating register
|
||
* AAoff alloca temporary
|
||
* CSoff common subs
|
||
* NDPoff any 8087 saved registers
|
||
* Toff temporaries
|
||
* monitor context record
|
||
* any saved registers
|
||
*/
|
||
|
||
if (tym == TYifunc)
|
||
Poff = 26;
|
||
else if (I64)
|
||
Poff = 16;
|
||
else if (I32)
|
||
Poff = farfunc ? 12 : 8;
|
||
else
|
||
Poff = farfunc ? 6 : 4;
|
||
|
||
Aoff = 0;
|
||
#if NTEXCEPTIONS == 2
|
||
Aoff -= nteh_contextsym_size();
|
||
#if MARS
|
||
if (funcsym_p->Sfunc->Fflags3 & Ffakeeh && nteh_contextsym_size() == 0)
|
||
Aoff -= 5 * 4;
|
||
#endif
|
||
#endif
|
||
Aoff = -align(0,-Aoff + Aoffset);
|
||
|
||
regsave.off = Aoff - align(0,regsave.top);
|
||
Foffset = floatreg ? (config.fpxmmregs ? 16 : DOUBLESIZE) : 0;
|
||
Foff = regsave.off - align(0,Foffset);
|
||
assert(usedalloca != 1);
|
||
AAoff = usedalloca ? (Foff - REGSIZE) : Foff;
|
||
CSoff = AAoff - align(0,cstop * REGSIZE);
|
||
NDPoff = CSoff - align(0,NDP::savetop * NDPSAVESIZE);
|
||
Toff = NDPoff - align(0,Toffset);
|
||
|
||
if (Foffset > Aalign)
|
||
Aalign = Foffset;
|
||
if (Aalign > REGSIZE)
|
||
{
|
||
// Adjust Aoff so that it is Aalign byte aligned, assuming that
|
||
// before function parameters were pushed the stack was
|
||
// Aalign byte aligned
|
||
targ_size_t psize = (Poffset + (REGSIZE - 1)) & ~(REGSIZE - 1);
|
||
int sz = psize + -Aoff + Poff + (needframe ? 0 : REGSIZE);
|
||
if (sz & (Aalign - 1))
|
||
{ int adj = Aalign - (sz & (Aalign - 1));
|
||
Aoff -= adj;
|
||
regsave.off -= adj;
|
||
Foff -= adj;
|
||
AAoff -= adj;
|
||
CSoff -= adj;
|
||
NDPoff -= adj;
|
||
Toff -= adj;
|
||
}
|
||
}
|
||
|
||
localsize = -Toff;
|
||
|
||
regm_t topush = fregsaved & ~mfuncreg; // mask of registers that need saving
|
||
int npush = 0; // number of registers that need saving
|
||
for (regm_t x = topush; x; x >>= 1)
|
||
npush += x & 1;
|
||
|
||
// Keep the stack aligned by 8 for any subsequent function calls
|
||
if (!I16 && calledafunc &&
|
||
(STACKALIGN == 16 || config.flags4 & CFG4stackalign))
|
||
{
|
||
//printf("npush = %d Poff = x%x needframe = %d localsize = x%x\n", npush, Poff, needframe, localsize);
|
||
|
||
int sz = Poff + (needframe ? 0 : -REGSIZE) + localsize + npush * REGSIZE;
|
||
if (STACKALIGN == 16)
|
||
{
|
||
if (sz & (8|4))
|
||
localsize += STACKALIGN - (sz & (8|4));
|
||
}
|
||
else if (sz & 4)
|
||
localsize += 4;
|
||
}
|
||
|
||
//printf("Foff x%02x Aoff x%02x Toff x%02x NDPoff x%02x CSoff x%02x Poff x%02x localsize x%02x\n",
|
||
//(int)Foff,(int)Aoff,(int)Toff,(int)NDPoff,(int)CSoff,(int)Poff,(int)localsize);
|
||
|
||
xlocalsize = localsize;
|
||
|
||
if (tyf & mTYnaked) // if no prolog/epilog for function
|
||
{
|
||
hasframe = 1;
|
||
return NULL;
|
||
}
|
||
|
||
if (tym == TYifunc)
|
||
{ static unsigned char ops2[] = { 0x60,0x1E,0x06,0 };
|
||
static unsigned char ops0[] = { 0x50,0x51,0x52,0x53,
|
||
0x54,0x55,0x56,0x57,
|
||
0x1E,0x06,0 };
|
||
|
||
unsigned char *p;
|
||
|
||
p = (config.target_cpu >= TARGET_80286) ? ops2 : ops0;
|
||
do
|
||
c = gen1(c,*p);
|
||
while (*++p);
|
||
c = genregs(c,0x8B,BP,SP); // MOV BP,SP
|
||
if (localsize)
|
||
c = genc2(c,0x81,modregrm(3,5,SP),localsize); // SUB SP,localsize
|
||
tyf |= mTYloadds;
|
||
hasframe = 1;
|
||
goto Lcont;
|
||
}
|
||
|
||
/* Determine if we need BP set up */
|
||
if (config.flags & CFGalwaysframe)
|
||
needframe = 1;
|
||
else
|
||
{
|
||
if (localsize)
|
||
{
|
||
if (I16 ||
|
||
!(config.flags4 & CFG4speed) ||
|
||
config.target_cpu < TARGET_Pentium ||
|
||
farfunc ||
|
||
config.flags & CFGstack ||
|
||
xlocalsize >= 0x1000 ||
|
||
(usednteh & ~NTEHjmonitor) ||
|
||
anyiasm ||
|
||
usedalloca
|
||
)
|
||
needframe = 1;
|
||
}
|
||
if (refparam && (anyiasm || I16))
|
||
needframe = 1;
|
||
}
|
||
|
||
if (needframe)
|
||
{ assert(mfuncreg & mBP); // shouldn't have used mBP
|
||
|
||
if (!guessneedframe) // if guessed wrong
|
||
goto Lagain;
|
||
}
|
||
|
||
if (I16 && config.wflags & WFwindows && farfunc)
|
||
{ int wflags;
|
||
int segreg;
|
||
|
||
#if SCPP
|
||
// alloca() can't be because the 'special' parameter won't be at
|
||
// a known offset from BP.
|
||
if (usedalloca == 1)
|
||
synerr(EM_alloca_win); // alloca() can't be in Windows functions
|
||
#endif
|
||
|
||
wflags = config.wflags;
|
||
if (wflags & WFreduced && !(tyf & mTYexport))
|
||
{ // reduced prolog/epilog for non-exported functions
|
||
wflags &= ~(WFdgroup | WFds | WFss);
|
||
}
|
||
|
||
c = getregs(mAX);
|
||
assert(!c); /* should not have any value in AX */
|
||
|
||
switch (wflags & (WFdgroup | WFds | WFss))
|
||
{ case WFdgroup: // MOV AX,DGROUP
|
||
if (wflags & WFreduced)
|
||
tyf &= ~mTYloadds; // remove redundancy
|
||
c = genc(c,0xC7,modregrm(3,0,AX),0,0,FLdatseg,(targ_uns) 0);
|
||
c->Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg
|
||
break;
|
||
case WFss:
|
||
segreg = 2; // SS
|
||
goto Lmovax;
|
||
case WFds:
|
||
segreg = 3; // DS
|
||
Lmovax:
|
||
c = gen2(c,0x8C,modregrm(3,segreg,AX)); // MOV AX,segreg
|
||
if (wflags & WFds)
|
||
gen1(c,0x90); // NOP
|
||
break;
|
||
case 0:
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
printf("config.wflags = x%x\n",config.wflags);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
if (wflags & WFincbp)
|
||
c = gen1(c,0x40 + BP); // INC BP
|
||
c = gen1(c,0x50 + BP); // PUSH BP
|
||
genregs(c,0x8B,BP,SP); // MOV BP,SP
|
||
if (wflags & (WFsaveds | WFds | WFss | WFdgroup))
|
||
{ gen1(c,0x1E); // PUSH DS
|
||
pushds = TRUE;
|
||
BPoff = -REGSIZE;
|
||
}
|
||
if (wflags & (WFds | WFss | WFdgroup))
|
||
gen2(c,0x8E,modregrm(3,3,AX)); // MOV DS,AX
|
||
|
||
enter = FALSE; /* don't use ENTER instruction */
|
||
hasframe = 1; /* we have a stack frame */
|
||
}
|
||
else
|
||
if (needframe) // if variables or parameters
|
||
{
|
||
if (config.wflags & WFincbp && farfunc)
|
||
c = gen1(c,0x40 + BP); /* INC BP */
|
||
if (config.target_cpu < TARGET_80286 ||
|
||
config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_OSX64 | EX_FREEBSD | EX_FREEBSD64 | EX_SOLARIS | EX_SOLARIS64) ||
|
||
!localsize ||
|
||
config.flags & CFGstack ||
|
||
(xlocalsize >= 0x1000 && config.exe & EX_flat) ||
|
||
localsize >= 0x10000 ||
|
||
#if NTEXCEPTIONS == 2
|
||
(usednteh & ~NTEHjmonitor && (config.flags2 & CFG2seh)) ||
|
||
#endif
|
||
(config.target_cpu >= TARGET_80386 &&
|
||
config.flags4 & CFG4speed)
|
||
)
|
||
{
|
||
c = gen1(c,0x50 + BP); // PUSH BP
|
||
genregs(c,0x8B,BP,SP); // MOV BP,SP
|
||
if (I64)
|
||
code_orrex(c, REX_W); // MOV RBP,RSP
|
||
#if ELFOBJ || MACHOBJ
|
||
if (config.fulltypes)
|
||
// Don't reorder instructions, as dwarf CFA relies on it
|
||
code_orflag(c, CFvolatile);
|
||
#endif
|
||
enter = FALSE; /* do not use ENTER instruction */
|
||
#if NTEXCEPTIONS == 2
|
||
if (usednteh & ~NTEHjmonitor && (config.flags2 & CFG2seh))
|
||
{
|
||
code *ce = nteh_prolog();
|
||
c = cat(c,ce);
|
||
int sz = nteh_contextsym_size();
|
||
assert(sz != 0); // should be 5*4, not 0
|
||
xlocalsize -= sz; // sz is already subtracted from ESP
|
||
// by nteh_prolog()
|
||
}
|
||
#endif
|
||
#if ELFOBJ || MACHOBJ
|
||
if (config.fulltypes)
|
||
{ int off = I64 ? 16 : 8;
|
||
dwarf_CFA_set_loc(1); // address after PUSH EBP
|
||
dwarf_CFA_set_reg_offset(SP, off); // CFA is now 8[ESP]
|
||
dwarf_CFA_offset(BP, -off); // EBP is at 0[ESP]
|
||
dwarf_CFA_set_loc(3); // address after MOV EBP,ESP
|
||
// Yes, I know the parameter is 8 when we mean 0!
|
||
// But this gets the cfa register set to EBP correctly
|
||
dwarf_CFA_set_reg_offset(BP, off); // CFA is now 0[EBP]
|
||
}
|
||
#endif
|
||
}
|
||
else
|
||
enter = TRUE;
|
||
hasframe = 1;
|
||
}
|
||
|
||
if (config.flags & CFGstack) /* if stack overflow check */
|
||
goto Ladjstack;
|
||
|
||
if (needframe) /* if variables or parameters */
|
||
{
|
||
if (xlocalsize) /* if any stack offset */
|
||
{
|
||
Ladjstack:
|
||
#if !TARGET_LINUX // seems that Linux doesn't need to fault in stack pages
|
||
if ((config.flags & CFGstack && !(I32 && xlocalsize < 0x1000)) // if stack overflow check
|
||
#if TARGET_WINDOS
|
||
|| (xlocalsize >= 0x1000 && config.exe & EX_flat)
|
||
#endif
|
||
)
|
||
{
|
||
if (I16)
|
||
{
|
||
// BUG: Won't work if parameter is passed in AX
|
||
c = movregconst(c,AX,xlocalsize,FALSE); // MOV AX,localsize
|
||
makeitextern(rtlsym[RTLSYM_CHKSTK]);
|
||
// CALL _chkstk
|
||
gencs(c,(LARGECODE) ? 0x9A : CALL,0,FLfunc,rtlsym[RTLSYM_CHKSTK]);
|
||
useregs((ALLREGS | mBP | mES) & ~rtlsym[RTLSYM_CHKSTK]->Sregsaved);
|
||
}
|
||
else
|
||
{
|
||
/* Watch out for 64 bit code where EDX is passed as a register parameter
|
||
*/
|
||
int reg = I64 ? R11 : DX; // scratch register
|
||
|
||
/* MOV EDX, xlocalsize/0x1000
|
||
* L1: SUB ESP, 0x1000
|
||
* TEST [ESP],ESP
|
||
* DEC EDX
|
||
* JNE L1
|
||
* SUB ESP, xlocalsize % 0x1000
|
||
*/
|
||
c = movregconst(c, reg, xlocalsize / 0x1000, FALSE);
|
||
code *csub = genc2(NULL,0x81,modregrm(3,5,SP),0x1000);
|
||
if (I64)
|
||
code_orrex(csub, REX_W);
|
||
code_orflag(csub, CFtarg2);
|
||
gen2sib(csub, 0x85, modregrm(0,SP,4),modregrm(0,4,SP));
|
||
if (I64)
|
||
{ gen2(csub, 0xFF, (REX_W << 16) | modregrmx(3,0,R11)); // DEC R11
|
||
genc2(csub,JNE,0,(targ_uns)-14);
|
||
}
|
||
else
|
||
{ gen1(csub, 0x48 + DX); // DEC EDX
|
||
genc2(csub,JNE,0,(targ_uns)-12);
|
||
}
|
||
regimmed_set(reg,0); // reg is now 0
|
||
genc2(csub,0x81,modregrm(3,5,SP),xlocalsize & 0xFFF);
|
||
if (I64)
|
||
code_orrex(csub, REX_W);
|
||
c = cat(c,csub);
|
||
useregs(mask[reg]);
|
||
}
|
||
}
|
||
else
|
||
#endif
|
||
{
|
||
if (enter)
|
||
{ // ENTER xlocalsize,0
|
||
c = genc(c,0xC8,0,FLconst,xlocalsize,FLconst,(targ_uns) 0);
|
||
#if ELFOBJ || MACHOBJ
|
||
assert(!config.fulltypes); // didn't emit Dwarf data
|
||
#endif
|
||
}
|
||
else if (xlocalsize == REGSIZE && config.flags4 & CFG4optimized)
|
||
{ c = gen1(c,0x50 + pushallocreg); // PUSH AX
|
||
// Do this to prevent an -x[EBP] to be moved in
|
||
// front of the push.
|
||
code_orflag(c,CFvolatile);
|
||
pushalloc = 1;
|
||
}
|
||
else
|
||
{ // SUB SP,xlocalsize
|
||
c = genc2(c,0x81,modregrm(3,5,SP),xlocalsize);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
}
|
||
|
||
if (usedalloca)
|
||
{
|
||
// Set up magic parameter for alloca()
|
||
// MOV -REGSIZE[BP],localsize - BPoff
|
||
//c = genc(c,0xC7,modregrm(2,0,BPRM),FLconst,-REGSIZE,FLconst,localsize - BPoff);
|
||
c = genc(c,0xC7,modregrm(2,0,BPRM),
|
||
FLconst,AAoff + BPoff,
|
||
FLconst,localsize - BPoff);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
}
|
||
else
|
||
assert(usedalloca == 0);
|
||
}
|
||
else if (xlocalsize)
|
||
{
|
||
assert(I32);
|
||
|
||
if (xlocalsize == REGSIZE)
|
||
{ c = gen1(c,0x50 + pushallocreg); // PUSH AX
|
||
pushalloc = 1;
|
||
}
|
||
else if (xlocalsize == 2 * REGSIZE)
|
||
{ c = gen1(c,0x50 + pushallocreg); // PUSH AX
|
||
gen1(c,0x50 + pushallocreg); // PUSH AX
|
||
pushalloc = 1;
|
||
}
|
||
else
|
||
{ // SUB ESP,xlocalsize
|
||
c = genc2(c,0x81,modregrm(3,5,SP),xlocalsize);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
BPoff += REGSIZE;
|
||
}
|
||
else
|
||
assert((localsize | usedalloca) == 0 || (usednteh & NTEHjmonitor));
|
||
EBPtoESP += xlocalsize;
|
||
|
||
/* The idea is to generate trace for all functions if -Nc is not thrown.
|
||
* If -Nc is thrown, generate trace only for global COMDATs, because those
|
||
* are relevant to the FUNCTIONS statement in the linker .DEF file.
|
||
* This same logic should be in epilog().
|
||
*/
|
||
if (config.flags & CFGtrace &&
|
||
(!(config.flags4 & CFG4allcomdat) ||
|
||
funcsym_p->Sclass == SCcomdat ||
|
||
funcsym_p->Sclass == SCglobal ||
|
||
(config.flags2 & CFG2comdat && SymInline(funcsym_p))
|
||
)
|
||
)
|
||
{
|
||
if (STACKALIGN == 16 && npush)
|
||
{ /* This could be avoided by moving the function call to after the
|
||
* registers are saved. But I don't remember why the call is here
|
||
* and not there.
|
||
*/
|
||
c = genc2(c,0x81,modregrm(3,5,SP),npush * REGSIZE); // SUB ESP,npush * REGSIZE
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
|
||
symbol *s = rtlsym[farfunc ? RTLSYM_TRACE_PRO_F : RTLSYM_TRACE_PRO_N];
|
||
makeitextern(s);
|
||
c = gencs(c,I16 ? 0x9A : CALL,0,FLfunc,s); // CALL _trace
|
||
if (!I16)
|
||
code_orflag(c,CFoff | CFselfrel);
|
||
/* Embedding the function name inline after the call works, but it
|
||
* makes disassembling the code annoying.
|
||
*/
|
||
#if ELFOBJ || MACHOBJ
|
||
size_t len = strlen(funcsym_p->Sident);
|
||
char *buffer = (char *)malloc(len + 4);
|
||
assert(buffer);
|
||
if (len <= 254)
|
||
{ buffer[0] = len;
|
||
memcpy(buffer + 1, funcsym_p->Sident, len);
|
||
len++;
|
||
}
|
||
else
|
||
{ buffer[0] = 0xFF;
|
||
buffer[1] = 0;
|
||
buffer[2] = len & 0xFF;
|
||
buffer[3] = len >> 8;
|
||
memcpy(buffer + 4, funcsym_p->Sident, len);
|
||
len += 4;
|
||
}
|
||
genasm(c, buffer, len); // append func name
|
||
free(buffer);
|
||
#else
|
||
char name[IDMAX+IDOHD+1];
|
||
size_t len = obj_mangle(funcsym_p,name);
|
||
assert(len < sizeof(name));
|
||
genasm(c,name,len); // append func name
|
||
#endif
|
||
if (STACKALIGN == 16 && npush)
|
||
{
|
||
c = genc2(c,0x81,modregrm(3,0,SP),npush * REGSIZE); // ADD ESP,npush * REGSIZE
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
useregs((ALLREGS | mBP | mES) & ~s->Sregsaved);
|
||
}
|
||
|
||
#if MARS
|
||
if (usednteh & NTEHjmonitor)
|
||
{ Symbol *sthis;
|
||
|
||
for (si = 0; 1; si++)
|
||
{ assert(si < globsym.top);
|
||
sthis = globsym.tab[si];
|
||
if (strcmp(sthis->Sident,"this") == 0)
|
||
break;
|
||
}
|
||
c = cat(c,nteh_monitor_prolog(sthis));
|
||
EBPtoESP += 3 * 4;
|
||
}
|
||
#endif
|
||
|
||
while (topush) /* while registers to push */
|
||
{ reg = findreg(topush);
|
||
topush &= ~mask[reg];
|
||
c = gen1(c,0x50 + (reg & 7));
|
||
if (reg & 8)
|
||
code_orrex(c, REX_B);
|
||
EBPtoESP += REGSIZE;
|
||
#if ELFOBJ || MACHOBJ
|
||
if (config.fulltypes)
|
||
{ // Emit debug_frame data giving location of saved register
|
||
// relative to 0[EBP]
|
||
pinholeopt(c, NULL);
|
||
dwarf_CFA_set_loc(calcblksize(c)); // address after PUSH reg
|
||
dwarf_CFA_offset(reg, -EBPtoESP - REGSIZE);
|
||
}
|
||
#endif
|
||
}
|
||
|
||
Lcont:
|
||
|
||
/* Determine if we need to reload DS */
|
||
if (tyf & mTYloadds)
|
||
{ code *c1;
|
||
|
||
if (!pushds) // if not already pushed
|
||
c = gen1(c,0x1E); // PUSH DS
|
||
c1 = genc(CNIL,0xC7,modregrm(3,0,AX),0,0,FLdatseg,(targ_uns) 0); /* MOV AX,DGROUP */
|
||
c1->Iflags ^= CFseg | CFoff; /* turn off CFoff, on CFseg */
|
||
c = cat(c,c1);
|
||
gen2(c,0x8E,modregrm(3,3,AX)); /* MOV DS,AX */
|
||
useregs(mAX);
|
||
}
|
||
|
||
if (tym == TYifunc)
|
||
c = gen1(c,0xFC); // CLD
|
||
|
||
#if NTEXCEPTIONS == 2
|
||
if (usednteh & NTEH_except)
|
||
c = cat(c,nteh_setsp(0x89)); // MOV __context[EBP].esp,ESP
|
||
#endif
|
||
|
||
// Load register parameters off of the stack. Do not use
|
||
// assignaddr(), as it will replace the stack reference with
|
||
// the register!
|
||
for (si = 0; si < globsym.top; si++)
|
||
{ symbol *s = globsym.tab[si];
|
||
code *c2;
|
||
unsigned sz = type_size(s->Stype);
|
||
|
||
if ((s->Sclass == SCregpar || s->Sclass == SCparameter) &&
|
||
s->Sfl == FLreg &&
|
||
(refparam
|
||
#if MARS
|
||
// This variable has been reference by a nested function
|
||
|| s->Stype->Tty & mTYvolatile
|
||
#endif
|
||
))
|
||
{
|
||
/* MOV reg,param[BP] */
|
||
//assert(refparam);
|
||
if (mask[s->Sreglsw] & XMMREGS)
|
||
{
|
||
unsigned op = xmmload(s->Stype->Tty); // MOVSS/D xreg,mem
|
||
unsigned xreg = s->Sreglsw - XMM0;
|
||
code *c2 = genc1(CNIL,op,modregxrm(2,xreg,BPRM),FLconst,Poff + s->Soffset);
|
||
if (!hasframe)
|
||
{ // Convert to ESP relative address rather than EBP
|
||
c2->Irm = modregxrm(2,xreg,4);
|
||
c2->Isib = modregrm(0,4,SP);
|
||
c2->IEVpointer1 += EBPtoESP;
|
||
}
|
||
c = cat(c,c2);
|
||
}
|
||
else
|
||
{
|
||
code *c2 = genc1(CNIL,0x8B ^ (sz == 1),
|
||
modregxrm(2,s->Sreglsw,BPRM),FLconst,Poff + s->Soffset);
|
||
if (!I16 && sz == SHORTSIZE)
|
||
c2->Iflags |= CFopsize; // operand size
|
||
if (I64 && sz >= REGSIZE)
|
||
c2->Irex |= REX_W;
|
||
if (!hasframe)
|
||
{ /* Convert to ESP relative address rather than EBP */
|
||
assert(!I16);
|
||
c2->Irm = modregxrm(2,s->Sreglsw,4);
|
||
c2->Isib = modregrm(0,4,SP);
|
||
c2->IEVpointer1 += EBPtoESP;
|
||
}
|
||
if (sz > REGSIZE)
|
||
{
|
||
code *c3 = genc1(CNIL,0x8B,
|
||
modregxrm(2,s->Sregmsw,BPRM),FLconst,Poff + s->Soffset + REGSIZE);
|
||
if (I64)
|
||
c3->Irex |= REX_W;
|
||
if (!hasframe)
|
||
{ /* Convert to ESP relative address rather than EBP */
|
||
assert(!I16);
|
||
c3->Irm = modregxrm(2,s->Sregmsw,4);
|
||
c3->Isib = modregrm(0,4,SP);
|
||
c3->IEVpointer1 += EBPtoESP;
|
||
}
|
||
c2 = cat(c2,c3);
|
||
}
|
||
c = cat(c,c2);
|
||
}
|
||
}
|
||
else if (s->Sclass == SCfastpar)
|
||
{ // Argument is passed in a register
|
||
unsigned preg = s->Spreg;
|
||
|
||
namedargs |= mask[preg];
|
||
|
||
if (s->Sfl == FLreg)
|
||
{ // MOV reg,preg
|
||
if (mask[preg] & XMMREGS)
|
||
{
|
||
unsigned op = xmmload(s->Stype->Tty); // MOVSS/D xreg,preg
|
||
unsigned xreg = s->Sreglsw - XMM0;
|
||
c = gen2(c,op,modregxrmx(3,xreg,preg - XMM0));
|
||
}
|
||
else
|
||
{
|
||
c = genmovreg(c,s->Sreglsw,preg);
|
||
if (I64 && sz == 8)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
}
|
||
else if (s->Sflags & SFLdead ||
|
||
(!anyiasm && !(s->Sflags & SFLread) && s->Sflags & SFLunambig &&
|
||
#if MARS
|
||
// This variable has been reference by a nested function
|
||
!(s->Stype->Tty & mTYvolatile) &&
|
||
#endif
|
||
(config.flags4 & CFG4optimized || !config.fulltypes)))
|
||
{
|
||
// Ignore it, as it is never referenced
|
||
;
|
||
}
|
||
else
|
||
{
|
||
targ_size_t offset = Aoff + BPoff + s->Soffset;
|
||
int op = 0x89; // MOV x[EBP],preg
|
||
if (preg >= XMM0 && preg <= XMM15)
|
||
{
|
||
op = xmmstore(s->Stype->Tty);
|
||
}
|
||
if (hasframe)
|
||
{
|
||
if (!(pushalloc && preg == pushallocreg))
|
||
{
|
||
// MOV x[EBP],preg
|
||
c2 = genc1(CNIL,op,
|
||
modregxrm(2,preg,BPRM),FLconst, offset);
|
||
if (preg >= XMM0 && preg <= XMM15)
|
||
{
|
||
}
|
||
else
|
||
{
|
||
//printf("%s Aoff = %d, BPoff = %d, Soffset = %d, sz = %d\n", s->Sident, (int)Aoff, (int)BPoff, (int)s->Soffset, (int)sz);
|
||
// if (offset & 2)
|
||
// c2->Iflags |= CFopsize;
|
||
if (I64 && sz == 8)
|
||
code_orrex(c2, REX_W);
|
||
}
|
||
c = cat(c, c2);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
offset += EBPtoESP;
|
||
if (!(pushalloc && preg == pushallocreg))
|
||
{
|
||
// MOV offset[ESP],preg
|
||
// BUG: byte size?
|
||
c2 = genc1(CNIL,op,
|
||
(modregrm(0,4,SP) << 8) |
|
||
modregxrm(2,preg,4),FLconst,offset);
|
||
if (preg >= XMM0 && preg <= XMM15)
|
||
{
|
||
}
|
||
else
|
||
{
|
||
if (I64 && sz == 8)
|
||
c2->Irex |= REX_W;
|
||
// if (offset & 2)
|
||
// c2->Iflags |= CFopsize;
|
||
}
|
||
c = cat(c,c2);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Load arguments passed in registers into the varargs save area
|
||
* so they can be accessed by va_arg().
|
||
*/
|
||
if (I64 && variadic(funcsym_p->Stype))
|
||
{
|
||
/* Look for __va_argsave
|
||
*/
|
||
symbol *sv = NULL;
|
||
for (SYMIDX si = 0; si < globsym.top; si++)
|
||
{ symbol *s = globsym.tab[si];
|
||
if (s->Sident[0] == '_' && strcmp(s->Sident, "__va_argsave") == 0)
|
||
{ sv = s;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (sv && !(sv->Sflags & SFLdead))
|
||
{
|
||
/* Generate code to move any arguments passed in registers into
|
||
* the stack variable __va_argsave,
|
||
* so we can reference it via pointers through va_arg().
|
||
* struct __va_argsave_t {
|
||
* size_t[6] regs;
|
||
* real[8] fpregs;
|
||
* uint offset_regs;
|
||
* uint offset_fpregs;
|
||
* void* stack_args;
|
||
* void* reg_args;
|
||
* }
|
||
* The MOVAPS instructions seg fault if data is not aligned on
|
||
* 16 bytes, so this gives us a nice check to ensure no mistakes.
|
||
MOV voff+0*8[RBP],EDI
|
||
MOV voff+1*8[RBP],ESI
|
||
MOV voff+2*8[RBP],RDX
|
||
MOV voff+3*8[RBP],RCX
|
||
MOV voff+4*8[RBP],R8
|
||
MOV voff+5*8[RBP],R9
|
||
MOVZX EAX,AL // AL = 0..8, # of XMM registers used
|
||
SHL EAX,2 // 4 bytes for each MOVAPS
|
||
LEA RDX,offset L2[RIP]
|
||
SUB RDX,RAX
|
||
LEA RAX,voff+6*8+0x7F[RBP]
|
||
JMP EDX
|
||
MOVAPS -0x0F[RAX],XMM7 // only save XMM registers if actually used
|
||
MOVAPS -0x1F[RAX],XMM6
|
||
MOVAPS -0x2F[RAX],XMM5
|
||
MOVAPS -0x3F[RAX],XMM4
|
||
MOVAPS -0x4F[RAX],XMM3
|
||
MOVAPS -0x5F[RAX],XMM2
|
||
MOVAPS -0x6F[RAX],XMM1
|
||
MOVAPS -0x7F[RAX],XMM0
|
||
L2:
|
||
MOV 1[RAX],offset_regs // set __va_argsave.offset_regs
|
||
MOV 5[RAX],offset_fpregs // set __va_argsave.offset_fpregs
|
||
LEA RDX, Poff+Poffset[RBP]
|
||
MOV 9[RAX],RDX // set __va_argsave.stack_args
|
||
SUB RAX,6*8+0x7F // point to start of __va_argsave
|
||
MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args
|
||
*/
|
||
targ_size_t voff = Aoff + BPoff + sv->Soffset; // EBP offset of start of sv
|
||
const int vregnum = 6;
|
||
const unsigned vsize = vregnum * 8 + 8 * 16;
|
||
code *cv = CNIL;
|
||
|
||
static unsigned char regs[vregnum] = { DI,SI,DX,CX,R8,R9 };
|
||
|
||
if (!hasframe)
|
||
voff += EBPtoESP;
|
||
for (int i = 0; i < vregnum; i++)
|
||
{
|
||
unsigned r = regs[i];
|
||
if (!(mask[r] & namedargs)) // named args are already dealt with
|
||
{ unsigned ea = (REX_W << 16) | modregxrm(2,r,BPRM);
|
||
if (!hasframe)
|
||
ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,r,4);
|
||
cv = genc1(cv,0x89,ea,FLconst,voff + i*8);
|
||
}
|
||
}
|
||
|
||
cv = genregs(cv,0x0FB6,AX,AX); // MOVZX EAX,AL
|
||
genc2(cv,0xC1,modregrm(3,4,AX),2); // SHL EAX,2
|
||
int raxoff = voff+6*8+0x7F;
|
||
unsigned L2offset = (raxoff < -0x7F) ? 0x2C : 0x29;
|
||
if (!hasframe)
|
||
L2offset += 1; // +1 for sib byte
|
||
// LEA RDX,offset L2[RIP]
|
||
genc1(cv,0x8D,(REX_W << 16) | modregrm(0,DX,5),FLconst,L2offset);
|
||
genregs(cv,0x29,AX,DX); // SUB RDX,RAX
|
||
code_orrex(cv, REX_W);
|
||
// LEA RAX,voff+vsize-6*8-16+0x7F[RBP]
|
||
unsigned ea = (REX_W << 16) | modregrm(2,AX,BPRM);
|
||
if (!hasframe)
|
||
// add sib byte for [RSP] addressing
|
||
ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,AX,4);
|
||
genc1(cv,0x8D,ea,FLconst,raxoff);
|
||
gen2(cv,0xFF,modregrm(3,4,DX)); // JMP EDX
|
||
for (int i = 0; i < 8; i++)
|
||
{
|
||
// MOVAPS -15-16*i[RAX],XMM7-i
|
||
genc1(cv,0x0F29,modregrm(0,XMM7-i,0),FLconst,-15-16*i);
|
||
}
|
||
|
||
/* Compute offset_regs and offset_fpregs
|
||
*/
|
||
unsigned offset_regs = 0;
|
||
unsigned offset_fpregs = vregnum * 8;
|
||
for (int i = AX; i <= XMM7; i++)
|
||
{ regm_t m = mask[i];
|
||
if (m & namedargs)
|
||
{
|
||
if (m & (mDI|mSI|mDX|mCX|mR8|mR9))
|
||
offset_regs += 8;
|
||
else if (m & XMMREGS)
|
||
offset_fpregs += 16;
|
||
namedargs &= ~m;
|
||
if (!namedargs)
|
||
break;
|
||
}
|
||
}
|
||
// MOV 1[RAX],offset_regs
|
||
genc(cv,0xC7,modregrm(2,0,AX),FLconst,1,FLconst,offset_regs);
|
||
|
||
// MOV 5[RAX],offset_fpregs
|
||
genc(cv,0xC7,modregrm(2,0,AX),FLconst,5,FLconst,offset_fpregs);
|
||
|
||
// LEA RDX, Poff+Poffset[RBP]
|
||
ea = modregrm(2,DX,BPRM);
|
||
if (!hasframe)
|
||
ea = (modregrm(0,4,SP) << 8) | modregrm(2,DX,4);
|
||
Poffset = (Poffset + (REGSIZE - 1)) & ~(REGSIZE - 1);
|
||
genc1(cv,0x8D,(REX_W << 16) | ea,FLconst,Poff + Poffset);
|
||
|
||
// MOV 9[RAX],RDX
|
||
genc1(cv,0x89,(REX_W << 16) | modregrm(2,DX,AX),FLconst,9);
|
||
|
||
// SUB RAX,6*8+0x7F // point to start of __va_argsave
|
||
genc2(cv,0x2D,0,6*8+0x7F);
|
||
code_orrex(cv, REX_W);
|
||
|
||
// MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args
|
||
genc1(cv,0x89,(REX_W << 16) | modregrm(2,AX,AX),FLconst,6*8+8*16+4+4+8);
|
||
|
||
pinholeopt(cv, NULL);
|
||
useregs(mDX|mAX);
|
||
c = cat(c,cv);
|
||
}
|
||
}
|
||
|
||
#if 0 && TARGET_LINUX
|
||
if (gotref)
|
||
{ // position independent reference
|
||
c = cat(c, cod3_load_got());
|
||
}
|
||
#endif
|
||
|
||
return c;
|
||
}
|
||
|
||
/*******************************
|
||
* Generate and return function epilog.
|
||
* Output:
|
||
* retsize Size of function epilog
|
||
*/
|
||
|
||
static targ_size_t spoff;
|
||
|
||
void epilog(block *b)
|
||
{ code *c;
|
||
code *cr;
|
||
code *ce;
|
||
code *cpopds;
|
||
unsigned reg;
|
||
unsigned regx; // register that's not a return reg
|
||
regm_t topop,regm;
|
||
tym_t tyf,tym;
|
||
int op;
|
||
char farfunc;
|
||
targ_size_t xlocalsize = localsize;
|
||
|
||
c = CNIL;
|
||
ce = b->Bcode;
|
||
tyf = funcsym_p->ty();
|
||
tym = tybasic(tyf);
|
||
farfunc = tyfarfunc(tym);
|
||
if (!(b->Bflags & BFLepilog)) // if no epilog code
|
||
goto Lret; // just generate RET
|
||
regx = (b->BC == BCret) ? AX : CX;
|
||
|
||
spoff = 0;
|
||
retsize = 0;
|
||
|
||
if (tyf & mTYnaked) // if no prolog/epilog
|
||
return;
|
||
|
||
if (tym == TYifunc)
|
||
{ static unsigned char ops2[] = { 0x07,0x1F,0x61,0xCF,0 };
|
||
static unsigned char ops0[] = { 0x07,0x1F,0x5F,0x5E,
|
||
0x5D,0x5B,0x5B,0x5A,
|
||
0x59,0x58,0xCF,0 };
|
||
unsigned char *p;
|
||
|
||
c = genregs(c,0x8B,SP,BP); // MOV SP,BP
|
||
p = (config.target_cpu >= TARGET_80286) ? ops2 : ops0;
|
||
do
|
||
gen1(c,*p);
|
||
while (*++p);
|
||
goto Lopt;
|
||
}
|
||
|
||
if (config.flags & CFGtrace &&
|
||
(!(config.flags4 & CFG4allcomdat) ||
|
||
funcsym_p->Sclass == SCcomdat ||
|
||
funcsym_p->Sclass == SCglobal ||
|
||
(config.flags2 & CFG2comdat && SymInline(funcsym_p))
|
||
)
|
||
)
|
||
{
|
||
symbol *s = rtlsym[farfunc ? RTLSYM_TRACE_EPI_F : RTLSYM_TRACE_EPI_N];
|
||
makeitextern(s);
|
||
c = gencs(c,I16 ? 0x9A : CALL,0,FLfunc,s); // CALLF _trace
|
||
if (!I16)
|
||
code_orflag(c,CFoff | CFselfrel);
|
||
useregs((ALLREGS | mBP | mES) & ~s->Sregsaved);
|
||
}
|
||
|
||
if (usednteh & ~NTEHjmonitor && (config.exe == EX_NT || MARS))
|
||
c = cat(c,nteh_epilog());
|
||
|
||
cpopds = CNIL;
|
||
if (tyf & mTYloadds)
|
||
{ cpopds = gen1(cpopds,0x1F); // POP DS
|
||
c = cat(c,cpopds);
|
||
spoff += intsize;
|
||
}
|
||
|
||
/* Pop all the general purpose registers saved on the stack
|
||
* by the prolog code. Remember to do them in the reverse
|
||
* order they were pushed.
|
||
*/
|
||
reg = I64 ? R15 : DI;
|
||
regm = 1 << reg;
|
||
topop = fregsaved & ~mfuncreg;
|
||
#ifdef DEBUG
|
||
if (topop & ~0xFFFF)
|
||
printf("fregsaved = x%x, mfuncreg = x%x\n",fregsaved,mfuncreg);
|
||
#endif
|
||
assert(!(topop & ~0xFFFF));
|
||
while (topop)
|
||
{ if (topop & regm)
|
||
{ c = gen1(c,0x58 + (reg & 7)); // POP reg
|
||
if (reg & 8)
|
||
code_orrex(c, REX_B);
|
||
topop &= ~regm;
|
||
spoff += REGSIZE;
|
||
}
|
||
regm >>= 1;
|
||
reg--;
|
||
}
|
||
|
||
#if MARS
|
||
if (usednteh & NTEHjmonitor)
|
||
{
|
||
regm_t retregs = 0;
|
||
if (b->BC == BCretexp)
|
||
retregs = regmask(b->Belem->Ety, tym);
|
||
code *cn = nteh_monitor_epilog(retregs);
|
||
c = cat(c,cn);
|
||
xlocalsize += 8;
|
||
}
|
||
#endif
|
||
|
||
if (config.wflags & WFwindows && farfunc)
|
||
{
|
||
int wflags = config.wflags;
|
||
if (wflags & WFreduced && !(tyf & mTYexport))
|
||
{ // reduced prolog/epilog for non-exported functions
|
||
wflags &= ~(WFdgroup | WFds | WFss);
|
||
if (!(wflags & WFsaveds))
|
||
goto L4;
|
||
}
|
||
|
||
if (localsize | usedalloca)
|
||
{
|
||
c = genc1(c,0x8D,modregrm(1,SP,6),FLconst,(targ_uns)-2); /* LEA SP,-2[BP] */
|
||
}
|
||
if (wflags & (WFsaveds | WFds | WFss | WFdgroup))
|
||
{ if (cpopds)
|
||
cpopds->Iop = NOP; // don't need previous one
|
||
c = gen1(c,0x1F); // POP DS
|
||
}
|
||
c = gen1(c,0x58 + BP); // POP BP
|
||
if (config.wflags & WFincbp)
|
||
gen1(c,0x48 + BP); // DEC BP
|
||
assert(hasframe);
|
||
}
|
||
else
|
||
{
|
||
if (needframe || (xlocalsize && hasframe))
|
||
{
|
||
L4:
|
||
assert(hasframe);
|
||
if (xlocalsize | usedalloca)
|
||
{ if (config.target_cpu >= TARGET_80286 &&
|
||
!(config.target_cpu >= TARGET_80386 &&
|
||
config.flags4 & CFG4speed)
|
||
)
|
||
c = gen1(c,0xC9); // LEAVE
|
||
else if (0 && xlocalsize == REGSIZE && !usedalloca && I32)
|
||
{ // This doesn't work - I should figure out why
|
||
mfuncreg &= ~mask[regx];
|
||
c = gen1(c,0x58 + regx); // POP regx
|
||
c = gen1(c,0x58 + BP); // POP BP
|
||
}
|
||
else
|
||
{ c = genregs(c,0x8B,SP,BP); // MOV SP,BP
|
||
if (I64)
|
||
code_orrex(c, REX_W); // MOV RSP,RBP
|
||
c = gen1(c,0x58 + BP); // POP BP
|
||
}
|
||
}
|
||
else
|
||
c = gen1(c,0x58 + BP); // POP BP
|
||
if (config.wflags & WFincbp && farfunc)
|
||
gen1(c,0x48 + BP); // DEC BP
|
||
}
|
||
else if (xlocalsize == REGSIZE && (!I16 || b->BC == BCret))
|
||
{ mfuncreg &= ~mask[regx];
|
||
c = gen1(c,0x58 + regx); // POP regx
|
||
}
|
||
else if (xlocalsize)
|
||
{
|
||
c = genc2(c,0x81,modregrm(3,0,SP),xlocalsize); // ADD SP,xlocalsize
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
}
|
||
if (b->BC == BCret || b->BC == BCretexp)
|
||
{
|
||
Lret:
|
||
op = tyfarfunc(tym) ? 0xCA : 0xC2;
|
||
if (tym == TYhfunc)
|
||
{
|
||
c = genc2(c,0xC2,0,4); // RET 4
|
||
}
|
||
else if (!typfunc(tym) || // if caller cleans the stack
|
||
Poffset == 0) // or nothing pushed on the stack anyway
|
||
{ op++; // to a regular RET
|
||
c = gen1(c,op);
|
||
}
|
||
else
|
||
{ // Stack is always aligned on register size boundary
|
||
Poffset = (Poffset + (REGSIZE - 1)) & ~(REGSIZE - 1);
|
||
c = genc2(c,op,0,Poffset); // RET Poffset
|
||
}
|
||
}
|
||
|
||
Lopt:
|
||
// If last instruction in ce is ADD SP,imm, and first instruction
|
||
// in c sets SP, we can dump the ADD.
|
||
cr = code_last(ce);
|
||
if (cr && c && !I64)
|
||
{
|
||
if (cr->Iop == 0x81 && cr->Irm == modregrm(3,0,SP)) // if ADD SP,imm
|
||
{
|
||
if (
|
||
c->Iop == 0xC9 || // LEAVE
|
||
(c->Iop == 0x8B && c->Irm == modregrm(3,SP,BP)) || // MOV SP,BP
|
||
(c->Iop == 0x8D && c->Irm == modregrm(1,SP,6)) // LEA SP,-imm[BP]
|
||
)
|
||
cr->Iop = NOP;
|
||
else if (c->Iop == 0x58 + BP) // if POP BP
|
||
{ cr->Iop = 0x8B;
|
||
cr->Irm = modregrm(3,SP,BP); // MOV SP,BP
|
||
}
|
||
}
|
||
#if 0 // These optimizations don't work if the called function
|
||
// cleans off the stack.
|
||
else if (c->Iop == 0xC3 && cr->Iop == CALL) // CALL near
|
||
{ cr->Iop = 0xE9; // JMP near
|
||
c->Iop = NOP;
|
||
}
|
||
else if (c->Iop == 0xCB && cr->Iop == 0x9A) // CALL far
|
||
{ cr->Iop = 0xEA; // JMP far
|
||
c->Iop = NOP;
|
||
}
|
||
#endif
|
||
}
|
||
|
||
retsize += calcblksize(c); // compute size of function epilog
|
||
b->Bcode = cat(ce,c);
|
||
}
|
||
|
||
/*******************************
|
||
* Return offset of SP from BP.
|
||
*/
|
||
|
||
targ_size_t cod3_spoff()
|
||
{
|
||
return spoff + localsize;
|
||
}
|
||
|
||
/**********************************
|
||
* Load value of _GLOBAL_OFFSET_TABLE_ into EBX
|
||
*/
|
||
|
||
code *cod3_load_got()
|
||
{
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
code *c;
|
||
code *cgot;
|
||
|
||
c = genc2(NULL,CALL,0,0); // CALL L1
|
||
gen1(c, 0x58 + BX); // L1: POP EBX
|
||
|
||
// ADD EBX,_GLOBAL_OFFSET_TABLE_+3
|
||
symbol *gotsym = elfobj_getGOTsym();
|
||
cgot = gencs(CNIL,0x81,0xC3,FLextern,gotsym);
|
||
cgot->Iflags = CFoff;
|
||
cgot->IEVoffset2 = 3;
|
||
|
||
makeitextern(gotsym);
|
||
return cat(c,cgot);
|
||
#else
|
||
assert(0);
|
||
return NULL;
|
||
#endif
|
||
}
|
||
|
||
code* gen_spill_reg(Symbol* s, bool toreg)
|
||
{
|
||
code *c;
|
||
code cs;
|
||
regm_t keepmsk = toreg ? RMload : RMstore;
|
||
int sz = type_size(s->Stype);
|
||
|
||
elem* e = el_var(s); // so we can trick getlvalue() into working for us
|
||
|
||
if (mask[s->Sreglsw] & XMMREGS)
|
||
{ // Convert to save/restore of XMM register
|
||
if (toreg)
|
||
cs.Iop = xmmload(s->Stype->Tty); // MOVSS/D xreg,mem
|
||
else
|
||
cs.Iop = xmmstore(s->Stype->Tty); // MOVSS/D mem,xreg
|
||
c = getlvalue(&cs,e,keepmsk);
|
||
cs.orReg(s->Sreglsw - XMM0);
|
||
c = gen(c,&cs);
|
||
}
|
||
else
|
||
{
|
||
cs.Iop = toreg ? 0x8B : 0x89; // MOV reg,mem[ESP] : MOV mem[ESP],reg
|
||
cs.Iop ^= (sz == 1);
|
||
c = getlvalue(&cs,e,keepmsk);
|
||
cs.orReg(s->Sreglsw);
|
||
if (I64 && sz == 1 && s->Sreglsw >= 4)
|
||
cs.Irex |= REX;
|
||
c = gen(c,&cs);
|
||
if (sz > REGSIZE)
|
||
{
|
||
cs.setReg(s->Sregmsw);
|
||
getlvalue_msw(&cs);
|
||
c = gen(c,&cs);
|
||
}
|
||
}
|
||
|
||
el_free(e);
|
||
|
||
return c;
|
||
}
|
||
|
||
/****************************
|
||
* Generate code for, and output a thunk.
|
||
* Input:
|
||
* thisty Type of this pointer
|
||
* p ESP parameter offset to this pointer
|
||
* d offset to add to 'this' pointer
|
||
* d2 offset from 'this' to vptr
|
||
* i offset into vtbl[]
|
||
*/
|
||
|
||
void cod3_thunk(symbol *sthunk,symbol *sfunc,unsigned p,tym_t thisty,
|
||
targ_size_t d,int i,targ_size_t d2)
|
||
{ code *c,*c1;
|
||
targ_size_t thunkoffset;
|
||
tym_t thunkty;
|
||
|
||
cod3_align();
|
||
|
||
/* Skip over return address */
|
||
thunkty = tybasic(sthunk->ty());
|
||
#if TARGET_SEGMENTED
|
||
if (tyfarfunc(thunkty))
|
||
p += I32 ? 8 : tysize[TYfptr]; /* far function */
|
||
else
|
||
#endif
|
||
p += tysize[TYnptr];
|
||
|
||
if (!I16)
|
||
{
|
||
/*
|
||
Generate:
|
||
ADD p[ESP],d
|
||
For direct call:
|
||
JMP sfunc
|
||
For virtual call:
|
||
MOV EAX, p[ESP] EAX = this
|
||
MOV EAX, d2[EAX] EAX = this->vptr
|
||
JMP i[EAX] jump to virtual function
|
||
*/
|
||
unsigned reg = 0;
|
||
if ((targ_ptrdiff_t)d < 0)
|
||
{
|
||
d = -d;
|
||
reg = 5; // switch from ADD to SUB
|
||
}
|
||
if (thunkty == TYmfunc)
|
||
{ // ADD ECX,d
|
||
c = CNIL;
|
||
if (d)
|
||
c = genc2(c,0x81,modregrm(3,reg,CX),d);
|
||
}
|
||
else if (thunkty == TYjfunc || (I64 && thunkty == TYnfunc))
|
||
{ // ADD EAX,d
|
||
c = CNIL;
|
||
if (d)
|
||
c = genc2(c,0x81,modregrm(3,reg,I64 ? DI : AX),d);
|
||
}
|
||
else
|
||
{
|
||
c = genc(CNIL,0x81,modregrm(2,reg,4),
|
||
FLconst,p, // to this
|
||
FLconst,d); // ADD p[ESP],d
|
||
c->Isib = modregrm(0,4,SP);
|
||
}
|
||
if (I64 && c)
|
||
c->Irex |= REX_W;
|
||
}
|
||
else
|
||
{
|
||
/*
|
||
Generate:
|
||
MOV BX,SP
|
||
ADD [SS:] p[BX],d
|
||
For direct call:
|
||
JMP sfunc
|
||
For virtual call:
|
||
MOV BX, p[BX] BX = this
|
||
MOV BX, d2[BX] BX = this->vptr
|
||
JMP i[BX] jump to virtual function
|
||
*/
|
||
|
||
|
||
c = genregs(CNIL,0x89,SP,BX); /* MOV BX,SP */
|
||
c1 = genc(CNIL,0x81,modregrm(2,0,7),
|
||
FLconst,p, /* to this */
|
||
FLconst,d); /* ADD p[BX],d */
|
||
if (config.wflags & WFssneds ||
|
||
// If DS needs reloading from SS,
|
||
// then assume SS != DS on thunk entry
|
||
(config.wflags & WFss && LARGEDATA))
|
||
c1->Iflags |= CFss; /* SS: */
|
||
c = cat(c,c1);
|
||
}
|
||
|
||
if ((i & 0xFFFF) != 0xFFFF) /* if virtual call */
|
||
{ code *c2,*c3;
|
||
|
||
#define FARTHIS (tysize(thisty) > REGSIZE)
|
||
#define FARVPTR FARTHIS
|
||
|
||
#if TARGET_SEGMENTED
|
||
assert(thisty != TYvptr); /* can't handle this case */
|
||
#endif
|
||
|
||
if (!I16)
|
||
{
|
||
assert(!FARTHIS && !LARGECODE);
|
||
if (thunkty == TYmfunc) // if 'this' is in ECX
|
||
{ c1 = CNIL;
|
||
|
||
// MOV EAX,d2[ECX]
|
||
c2 = genc1(CNIL,0x8B,modregrm(2,AX,CX),FLconst,d2);
|
||
}
|
||
else if (thunkty == TYjfunc) // if 'this' is in EAX
|
||
{ c1 = CNIL;
|
||
|
||
// MOV EAX,d2[EAX]
|
||
c2 = genc1(CNIL,0x8B,modregrm(2,AX,AX),FLconst,d2);
|
||
}
|
||
else
|
||
{
|
||
// MOV EAX,p[ESP]
|
||
c1 = genc1(CNIL,0x8B,(modregrm(0,4,SP) << 8) | modregrm(2,AX,4),FLconst,(targ_uns) p);
|
||
if (I64)
|
||
c1->Irex |= REX_W;
|
||
|
||
// MOV EAX,d2[EAX]
|
||
c2 = genc1(CNIL,0x8B,modregrm(2,AX,AX),FLconst,d2);
|
||
}
|
||
if (I64)
|
||
code_orrex(c2, REX_W);
|
||
/* JMP i[EAX] */
|
||
c3 = genc1(CNIL,0xFF,modregrm(2,4,0),FLconst,(targ_uns) i);
|
||
}
|
||
else
|
||
{
|
||
/* MOV/LES BX,[SS:] p[BX] */
|
||
c1 = genc1(CNIL,(FARTHIS ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,(targ_uns) p);
|
||
if (config.wflags & WFssneds ||
|
||
// If DS needs reloading from SS,
|
||
// then assume SS != DS on thunk entry
|
||
(config.wflags & WFss && LARGEDATA))
|
||
c1->Iflags |= CFss; /* SS: */
|
||
|
||
/* MOV/LES BX,[ES:]d2[BX] */
|
||
c2 = genc1(CNIL,(FARVPTR ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,d2);
|
||
if (FARTHIS)
|
||
c2->Iflags |= CFes; /* ES: */
|
||
|
||
/* JMP i[BX] */
|
||
c3 = genc1(CNIL,0xFF,modregrm(2,(LARGECODE ? 5 : 4),7),FLconst,(targ_uns) i);
|
||
if (FARVPTR)
|
||
c3->Iflags |= CFes; /* ES: */
|
||
}
|
||
c = cat4(c,c1,c2,c3);
|
||
}
|
||
else
|
||
{
|
||
c1 = gencs(CNIL,(LARGECODE ? 0xEA : 0xE9),0,FLfunc,sfunc); /* JMP sfunc */
|
||
c1->Iflags |= LARGECODE ? (CFseg | CFoff) : (CFselfrel | CFoff);
|
||
c = cat(c,c1);
|
||
}
|
||
|
||
thunkoffset = Coffset;
|
||
pinholeopt(c,NULL);
|
||
codout(c);
|
||
code_free(c);
|
||
|
||
sthunk->Soffset = thunkoffset;
|
||
sthunk->Ssize = Coffset - thunkoffset; /* size of thunk */
|
||
sthunk->Sseg = cseg;
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
objpubdef(cseg,sthunk,sthunk->Soffset);
|
||
#endif
|
||
searchfixlist(sthunk); /* resolve forward refs */
|
||
}
|
||
|
||
/*****************************
|
||
* Assume symbol s is extern.
|
||
*/
|
||
|
||
void makeitextern(symbol *s)
|
||
{
|
||
if (s->Sxtrnnum == 0)
|
||
{ s->Sclass = SCextern; /* external */
|
||
/*printf("makeitextern(x%x)\n",s);*/
|
||
objextern(s);
|
||
}
|
||
}
|
||
|
||
|
||
/*******************************
|
||
* Replace JMPs in Bgotocode with JMP SHORTs whereever possible.
|
||
* This routine depends on FLcode jumps to only be forward
|
||
* referenced.
|
||
* BFLjmpoptdone is set to TRUE if nothing more can be done
|
||
* with this block.
|
||
* Input:
|
||
* flag !=0 means don't have correct Boffsets yet
|
||
* Returns:
|
||
* number of bytes saved
|
||
*/
|
||
|
||
int branch(block *bl,int flag)
|
||
{ int bytesaved;
|
||
code *c,*cn,*ct;
|
||
targ_size_t offset,disp;
|
||
targ_size_t csize;
|
||
|
||
if (!flag)
|
||
bl->Bflags |= BFLjmpoptdone; // assume this will be all
|
||
c = bl->Bcode;
|
||
if (!c)
|
||
return 0;
|
||
bytesaved = 0;
|
||
offset = bl->Boffset; /* offset of start of block */
|
||
while (1)
|
||
{ unsigned char op;
|
||
|
||
csize = calccodsize(c);
|
||
cn = code_next(c);
|
||
op = c->Iop;
|
||
if ((op & ~0x0F) == 0x70 && c->Iflags & CFjmp16 ||
|
||
op == JMP)
|
||
{
|
||
L1:
|
||
switch (c->IFL2)
|
||
{
|
||
case FLblock:
|
||
if (flag) // no offsets yet, don't optimize
|
||
goto L3;
|
||
disp = c->IEV2.Vblock->Boffset - offset - csize;
|
||
|
||
/* If this is a forward branch, and there is an aligned
|
||
* block intervening, it is possible that shrinking
|
||
* the jump instruction will cause it to be out of
|
||
* range of the target. This happens if the alignment
|
||
* prevents the target block from moving correspondingly
|
||
* closer.
|
||
*/
|
||
if (disp >= 0x7F-4 && c->IEV2.Vblock->Boffset > offset)
|
||
{ /* Look for intervening alignment
|
||
*/
|
||
for (block *b = bl->Bnext; b; b = b->Bnext)
|
||
{
|
||
if (b->Balign)
|
||
{
|
||
bl->Bflags &= ~BFLjmpoptdone; // some JMPs left
|
||
goto L3;
|
||
}
|
||
if (b == c->IEV2.Vblock)
|
||
break;
|
||
}
|
||
}
|
||
|
||
break;
|
||
|
||
case FLcode:
|
||
{ code *cr;
|
||
|
||
disp = 0;
|
||
|
||
ct = c->IEV2.Vcode; /* target of branch */
|
||
assert(ct->Iflags & (CFtarg | CFtarg2));
|
||
for (cr = cn; cr; cr = code_next(cr))
|
||
{
|
||
if (cr == ct)
|
||
break;
|
||
disp += calccodsize(cr);
|
||
}
|
||
|
||
if (!cr)
|
||
{ // Didn't find it in forward search. Try backwards jump
|
||
int s = 0;
|
||
disp = 0;
|
||
for (cr = bl->Bcode; cr != cn; cr = code_next(cr))
|
||
{
|
||
assert(cr != NULL); // must have found it
|
||
if (cr == ct)
|
||
s = 1;
|
||
if (s)
|
||
disp += calccodsize(cr);
|
||
}
|
||
}
|
||
|
||
if (config.flags4 & CFG4optimized && !flag)
|
||
{
|
||
/* Propagate branch forward past junk */
|
||
while (1)
|
||
{ if (ct->Iop == NOP ||
|
||
ct->Iop == (ESCAPE | ESClinnum))
|
||
{ ct = code_next(ct);
|
||
if (!ct)
|
||
goto L2;
|
||
}
|
||
else
|
||
{ c->IEV2.Vcode = ct;
|
||
ct->Iflags |= CFtarg;
|
||
break;
|
||
}
|
||
}
|
||
|
||
/* And eliminate jmps to jmps */
|
||
if ((op == ct->Iop || ct->Iop == JMP) &&
|
||
(op == JMP || c->Iflags & CFjmp16))
|
||
{ c->IFL2 = ct->IFL2;
|
||
c->IEV2.Vcode = ct->IEV2.Vcode;
|
||
/*printf("eliminating branch\n");*/
|
||
goto L1;
|
||
}
|
||
L2: ;
|
||
}
|
||
}
|
||
break;
|
||
|
||
default:
|
||
goto L3;
|
||
}
|
||
|
||
if (disp == 0) // bra to next instruction
|
||
{ bytesaved += csize;
|
||
c->Iop = NOP; // del branch instruction
|
||
c->IEV2.Vcode = NULL;
|
||
c = cn;
|
||
if (!c)
|
||
break;
|
||
continue;
|
||
}
|
||
else if ((targ_size_t)(targ_schar)(disp - 2) == (disp - 2) &&
|
||
(targ_size_t)(targ_schar)disp == disp)
|
||
{
|
||
if (op == JMP)
|
||
{ c->Iop = JMPS; // JMP SHORT
|
||
bytesaved += I16 ? 1 : 3;
|
||
}
|
||
else // else Jcond
|
||
{ c->Iflags &= ~CFjmp16; // a branch is ok
|
||
bytesaved += I16 ? 3 : 4;
|
||
|
||
// Replace a cond jump around a call to a function that
|
||
// never returns with a cond jump to that function.
|
||
if (config.flags4 & CFG4optimized &&
|
||
config.target_cpu >= TARGET_80386 &&
|
||
disp == (I16 ? 3 : 5) &&
|
||
cn &&
|
||
cn->Iop == CALL &&
|
||
cn->IFL2 == FLfunc &&
|
||
cn->IEVsym2->Sflags & SFLexit &&
|
||
!(cn->Iflags & (CFtarg | CFtarg2))
|
||
)
|
||
{
|
||
cn->Iop = 0x0F00 | ((c->Iop & 0x0F) ^ 0x81);
|
||
c->Iop = NOP;
|
||
c->IEV2.Vcode = NULL;
|
||
bytesaved++;
|
||
|
||
// If nobody else points to ct, we can remove the CFtarg
|
||
if (flag && ct)
|
||
{ code *cx;
|
||
|
||
for (cx = bl->Bcode; 1; cx = code_next(cx))
|
||
{
|
||
if (!cx)
|
||
{ ct->Iflags &= ~CFtarg;
|
||
break;
|
||
}
|
||
if (cx->IEV2.Vcode == ct)
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
csize = calccodsize(c);
|
||
}
|
||
else
|
||
bl->Bflags &= ~BFLjmpoptdone; // some JMPs left
|
||
}
|
||
L3:
|
||
if (cn)
|
||
{ offset += csize;
|
||
c = cn;
|
||
}
|
||
else
|
||
break;
|
||
}
|
||
//printf("bytesaved = x%x\n",bytesaved);
|
||
return bytesaved;
|
||
}
|
||
|
||
/************************************************
|
||
* Adjust all Soffset's of stack variables so they
|
||
* are all relative to the frame pointer.
|
||
*/
|
||
|
||
#if MARS
|
||
|
||
void cod3_adjSymOffsets()
|
||
{ SYMIDX si;
|
||
|
||
//printf("cod3_adjSymOffsets()\n");
|
||
for (si = 0; si < globsym.top; si++)
|
||
{ //printf("globsym.tab[%d] = %p\n",si,globsym.tab[si]);
|
||
symbol *s = globsym.tab[si];
|
||
|
||
switch (s->Sclass)
|
||
{
|
||
case SCparameter:
|
||
case SCregpar:
|
||
//printf("s = '%s', Soffset = x%x, Poff = x%x, EBPtoESP = x%x\n", s->Sident, s->Soffset, Poff, EBPtoESP);
|
||
s->Soffset += Poff;
|
||
if (0 && !(funcsym_p->Sfunc->Fflags3 & Fmember))
|
||
{
|
||
if (!hasframe)
|
||
s->Soffset += EBPtoESP;
|
||
if (funcsym_p->Sfunc->Fflags3 & Fnested)
|
||
s->Soffset += REGSIZE;
|
||
}
|
||
break;
|
||
case SCauto:
|
||
case SCfastpar:
|
||
case SCregister:
|
||
case_auto:
|
||
//printf("s = '%s', Soffset = x%x, Aoff = x%x, BPoff = x%x EBPtoESP = x%x\n", s->Sident, s->Soffset, Aoff, BPoff, EBPtoESP);
|
||
// if (!(funcsym_p->Sfunc->Fflags3 & Fnested))
|
||
s->Soffset += Aoff + BPoff;
|
||
break;
|
||
case SCbprel:
|
||
break;
|
||
default:
|
||
continue;
|
||
}
|
||
#if 0
|
||
if (!hasframe)
|
||
s->Soffset += EBPtoESP;
|
||
#endif
|
||
}
|
||
}
|
||
|
||
#endif
|
||
|
||
/*******************************
|
||
* Take symbol info in union ev and replace it with a real address
|
||
* in Vpointer.
|
||
*/
|
||
|
||
void assignaddr(block *bl)
|
||
{
|
||
int EBPtoESPsave = EBPtoESP;
|
||
int hasframesave = hasframe;
|
||
|
||
if (bl->Bflags & BFLoutsideprolog)
|
||
{ EBPtoESP = -REGSIZE;
|
||
hasframe = 0;
|
||
}
|
||
assignaddrc(bl->Bcode);
|
||
hasframe = hasframesave;
|
||
EBPtoESP = EBPtoESPsave;
|
||
}
|
||
|
||
void assignaddrc(code *c)
|
||
{
|
||
int sn;
|
||
symbol *s;
|
||
unsigned char ins,rm;
|
||
targ_size_t soff;
|
||
targ_size_t base;
|
||
|
||
base = EBPtoESP;
|
||
for (; c; c = code_next(c))
|
||
{
|
||
#ifdef DEBUG
|
||
if (0)
|
||
{ printf("assignaddrc()\n");
|
||
c->print();
|
||
}
|
||
if (code_next(c) && code_next(code_next(c)) == c)
|
||
assert(0);
|
||
#endif
|
||
if (c->Iflags & CFvex)
|
||
ins = vex_inssize(c);
|
||
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(c->Iop >> 8) & 0xFF];
|
||
else if ((c->Iop & 0xFF00) == 0x0F00)
|
||
ins = inssize2[c->Iop & 0xFF];
|
||
else if ((c->Iop & 0xFF) == ESCAPE)
|
||
{
|
||
if (c->Iop == (ESCAPE | ESCadjesp))
|
||
{
|
||
//printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,c->IEV2.Vint);
|
||
EBPtoESP += c->IEV1.Vint;
|
||
c->Iop = NOP;
|
||
}
|
||
if (c->Iop == (ESCAPE | ESCframeptr))
|
||
{ // Convert to load of frame pointer
|
||
// c->Irm is the register to use
|
||
if (hasframe)
|
||
{ // MOV reg,EBP
|
||
c->Iop = 0x89;
|
||
if (c->Irm & 8)
|
||
c->Irex |= REX_B;
|
||
c->Irm = modregrm(3,BP,c->Irm & 7);
|
||
}
|
||
else
|
||
{ // LEA reg,EBPtoESP[ESP]
|
||
c->Iop = 0x8D;
|
||
if (c->Irm & 8)
|
||
c->Irex |= REX_R;
|
||
c->Irm = modregrm(2,c->Irm & 7,4);
|
||
c->Isib = modregrm(0,4,SP);
|
||
c->Iflags = CFoff;
|
||
c->IFL1 = FLconst;
|
||
c->IEV1.Vuns = EBPtoESP;
|
||
}
|
||
}
|
||
if (I64)
|
||
c->Irex |= REX_W;
|
||
continue;
|
||
}
|
||
else
|
||
ins = inssize[c->Iop & 0xFF];
|
||
if (!(ins & M) ||
|
||
((rm = c->Irm) & 0xC0) == 0xC0)
|
||
goto do2; /* if no first operand */
|
||
if (is32bitaddr(I32,c->Iflags))
|
||
{
|
||
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
else
|
||
{
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 6))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
s = c->IEVsym1;
|
||
switch (c->IFL1)
|
||
{
|
||
#if OMFOBJ
|
||
case FLdata:
|
||
if (s->Sclass == SCcomdat)
|
||
{ c->IFL1 = FLextern;
|
||
goto do2;
|
||
}
|
||
#if MARS
|
||
c->IEVseg1 = s->Sseg;
|
||
#else
|
||
c->IEVseg1 = DATA;
|
||
#endif
|
||
c->IEVpointer1 += s->Soffset;
|
||
c->IFL1 = FLdatseg;
|
||
goto do2;
|
||
case FLudata:
|
||
#if MARS
|
||
c->IEVseg1 = s->Sseg;
|
||
#else
|
||
c->IEVseg1 = UDATA;
|
||
#endif
|
||
c->IEVpointer1 += s->Soffset;
|
||
c->IFL1 = FLdatseg;
|
||
goto do2;
|
||
#else // don't loose symbol information
|
||
case FLdata:
|
||
case FLudata:
|
||
case FLtlsdata:
|
||
c->IFL1 = FLextern;
|
||
goto do2;
|
||
#endif
|
||
case FLdatseg:
|
||
c->IEVseg1 = DATA;
|
||
goto do2;
|
||
|
||
#if TARGET_SEGMENTED
|
||
case FLfardata:
|
||
case FLcsdata:
|
||
#endif
|
||
case FLpseudo:
|
||
goto do2;
|
||
|
||
case FLstack:
|
||
//printf("Soffset = %d, EBPtoESP = %d, base = %d, pointer = %d\n",
|
||
//s->Soffset,EBPtoESP,base,c->IEVpointer1);
|
||
c->IEVpointer1 += s->Soffset + EBPtoESP - base - EEoffset;
|
||
break;
|
||
|
||
case FLreg:
|
||
case FLauto:
|
||
soff = Aoff;
|
||
L1:
|
||
if (s->Sflags & SFLunambig && !(s->Sflags & SFLread) && // if never loaded
|
||
!anyiasm &&
|
||
// if not optimized, leave it in for debuggability
|
||
(config.flags4 & CFG4optimized || !config.fulltypes))
|
||
{ c->Iop = NOP; // remove references to it
|
||
continue;
|
||
}
|
||
if (s->Sfl == FLreg && c->IEVpointer1 < 2)
|
||
{ int reg = s->Sreglsw;
|
||
|
||
assert(!(s->Sregm & ~mask[reg]));
|
||
if (c->IEVpointer1 == 1)
|
||
{ assert(reg < 4); /* must be a BYTEREGS */
|
||
reg |= 4; /* convert to high byte reg */
|
||
}
|
||
if (reg & 8)
|
||
{ assert(I64);
|
||
c->Irex |= REX_B;
|
||
reg &= 7;
|
||
}
|
||
c->Irm = (c->Irm & modregrm(0,7,0))
|
||
| modregrm(3,0,reg);
|
||
assert(c->Iop != LES && c->Iop != LEA);
|
||
goto do2;
|
||
}
|
||
else
|
||
{ c->IEVpointer1 += s->Soffset + soff + BPoff;
|
||
if (s->Sflags & SFLunambig)
|
||
c->Iflags |= CFunambig;
|
||
L2:
|
||
if (!hasframe)
|
||
{ /* Convert to ESP relative address instead of EBP */
|
||
unsigned char rm;
|
||
|
||
assert(!I16);
|
||
c->IEVpointer1 += EBPtoESP;
|
||
rm = c->Irm;
|
||
if ((rm & 7) == 4) // if SIB byte
|
||
{
|
||
assert((c->Isib & 7) == BP);
|
||
assert((rm & 0xC0) != 0);
|
||
c->Isib = (c->Isib & ~7) | modregrm(0,0,SP);
|
||
}
|
||
else
|
||
{
|
||
assert((rm & 7) == 5);
|
||
c->Irm = (rm & modregrm(0,7,0))
|
||
| modregrm(2,0,4);
|
||
c->Isib = modregrm(0,4,SP);
|
||
}
|
||
}
|
||
}
|
||
break;
|
||
case FLpara:
|
||
soff = Poff - BPoff; // cancel out add of BPoff
|
||
goto L1;
|
||
case FLtmp:
|
||
soff = Toff;
|
||
goto L1;
|
||
case FLfltreg:
|
||
c->IEVpointer1 += Foff + BPoff;
|
||
c->Iflags |= CFunambig;
|
||
goto L2;
|
||
case FLallocatmp:
|
||
c->IEVpointer1 += AAoff + BPoff;
|
||
goto L2;
|
||
case FLbprel:
|
||
c->IEVpointer1 += s->Soffset;
|
||
break;
|
||
case FLcs:
|
||
sn = c->IEV1.Vuns;
|
||
if (!CSE_loaded(sn)) // if never loaded
|
||
{ c->Iop = NOP;
|
||
continue;
|
||
}
|
||
c->IEVpointer1 = sn * REGSIZE + CSoff + BPoff;
|
||
c->Iflags |= CFunambig;
|
||
goto L2;
|
||
case FLregsave:
|
||
sn = c->IEV1.Vuns;
|
||
c->IEVpointer1 = sn + regsave.off + BPoff;
|
||
c->Iflags |= CFunambig;
|
||
goto L2;
|
||
case FLndp:
|
||
#if MARS
|
||
assert(c->IEV1.Vuns < NDP::savetop);
|
||
#endif
|
||
c->IEVpointer1 = c->IEV1.Vuns * NDPSAVESIZE + NDPoff + BPoff;
|
||
c->Iflags |= CFunambig;
|
||
goto L2;
|
||
case FLoffset:
|
||
break;
|
||
case FLlocalsize:
|
||
c->IEVpointer1 += localsize;
|
||
break;
|
||
case FLconst:
|
||
default:
|
||
goto do2;
|
||
}
|
||
c->IFL1 = FLconst;
|
||
do2:
|
||
/* Ignore TEST (F6 and F7) opcodes */
|
||
if (!(ins & T)) goto done; /* if no second operand */
|
||
s = c->IEVsym2;
|
||
switch (c->IFL2)
|
||
{
|
||
#if ELFOBJ || MACHOBJ
|
||
case FLdata:
|
||
case FLudata:
|
||
case FLtlsdata:
|
||
c->IFL2 = FLextern;
|
||
goto do2;
|
||
#else
|
||
case FLdata:
|
||
if (s->Sclass == SCcomdat)
|
||
{ c->IFL2 = FLextern;
|
||
goto do2;
|
||
}
|
||
#if MARS
|
||
c->IEVseg2 = s->Sseg;
|
||
#else
|
||
c->IEVseg2 = DATA;
|
||
#endif
|
||
c->IEVpointer2 += s->Soffset;
|
||
c->IFL2 = FLdatseg;
|
||
goto done;
|
||
case FLudata:
|
||
#if MARS
|
||
c->IEVseg2 = s->Sseg;
|
||
#else
|
||
c->IEVseg2 = UDATA;
|
||
#endif
|
||
c->IEVpointer2 += s->Soffset;
|
||
c->IFL2 = FLdatseg;
|
||
goto done;
|
||
#endif
|
||
case FLdatseg:
|
||
c->IEVseg2 = DATA;
|
||
goto done;
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
goto done;
|
||
#endif
|
||
case FLreg:
|
||
case FLpseudo:
|
||
assert(0);
|
||
/* NOTREACHED */
|
||
case FLauto:
|
||
c->IEVpointer2 += s->Soffset + Aoff + BPoff;
|
||
break;
|
||
case FLpara:
|
||
c->IEVpointer2 += s->Soffset + Poff;
|
||
break;
|
||
case FLtmp:
|
||
c->IEVpointer2 += s->Soffset + Toff + BPoff;
|
||
break;
|
||
case FLfltreg:
|
||
c->IEVpointer2 += Foff + BPoff;
|
||
break;
|
||
case FLallocatmp:
|
||
c->IEVpointer2 += AAoff + BPoff;
|
||
break;
|
||
case FLbprel:
|
||
c->IEVpointer2 += s->Soffset;
|
||
break;
|
||
|
||
case FLstack:
|
||
c->IEVpointer2 += s->Soffset + EBPtoESP - base;
|
||
break;
|
||
|
||
case FLcs:
|
||
case FLndp:
|
||
case FLregsave:
|
||
assert(0);
|
||
/* NOTREACHED */
|
||
|
||
case FLconst:
|
||
break;
|
||
|
||
case FLlocalsize:
|
||
c->IEVpointer2 += localsize;
|
||
break;
|
||
|
||
default:
|
||
goto done;
|
||
}
|
||
c->IFL2 = FLconst;
|
||
done:
|
||
;
|
||
}
|
||
}
|
||
|
||
/*******************************
|
||
* Return offset from BP of symbol s.
|
||
*/
|
||
|
||
targ_size_t cod3_bpoffset(symbol *s)
|
||
{ targ_size_t offset;
|
||
|
||
symbol_debug(s);
|
||
offset = s->Soffset;
|
||
switch (s->Sfl)
|
||
{
|
||
case FLpara:
|
||
offset += Poff;
|
||
break;
|
||
case FLauto:
|
||
offset += Aoff + BPoff;
|
||
break;
|
||
case FLtmp:
|
||
offset += Toff + BPoff;
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL((enum FL)s->Sfl);
|
||
symbol_print(s);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
assert(hasframe);
|
||
return offset;
|
||
}
|
||
|
||
|
||
/*******************************
|
||
* Find shorter versions of the same instructions.
|
||
* Does these optimizations:
|
||
* replaces jmps to the next instruction with NOPs
|
||
* sign extension of modregrm displacement
|
||
* sign extension of immediate data (can't do it for OR, AND, XOR
|
||
* as the opcodes are not defined)
|
||
* short versions for AX EA
|
||
* short versions for reg EA
|
||
* Input:
|
||
* b -> block for code (or NULL)
|
||
*/
|
||
|
||
void pinholeopt(code *c,block *b)
|
||
{ targ_size_t a;
|
||
unsigned op,mod;
|
||
unsigned char ins;
|
||
int usespace;
|
||
int useopsize;
|
||
int space;
|
||
block *bn;
|
||
|
||
#ifdef DEBUG
|
||
static int tested; if (!tested) { tested++; pinholeopt_unittest(); }
|
||
#endif
|
||
|
||
#if 0
|
||
code *cstart = c;
|
||
if (debugc)
|
||
{
|
||
printf("+pinholeopt(%p)\n",c);
|
||
}
|
||
#endif
|
||
|
||
if (b)
|
||
{ bn = b->Bnext;
|
||
usespace = (config.flags4 & CFG4space && b->BC != BCasm);
|
||
useopsize = (I16 || (config.flags4 & CFG4space && b->BC != BCasm));
|
||
}
|
||
else
|
||
{ bn = NULL;
|
||
usespace = (config.flags4 & CFG4space);
|
||
useopsize = (I16 || config.flags4 & CFG4space);
|
||
}
|
||
for (; c; c = code_next(c))
|
||
{
|
||
L1:
|
||
op = c->Iop;
|
||
if (c->Iflags & CFvex)
|
||
ins = vex_inssize(c);
|
||
else if ((op & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(op >> 8) & 0xFF];
|
||
else if ((op & 0xFF00) == 0x0F00)
|
||
ins = inssize2[op & 0xFF];
|
||
else
|
||
ins = inssize[op & 0xFF];
|
||
if (ins & M) // if modregrm byte
|
||
{ int shortop = (c->Iflags & CFopsize) ? !I16 : I16;
|
||
int local_BPRM = BPRM;
|
||
|
||
if (c->Iflags & CFaddrsize)
|
||
local_BPRM ^= 5 ^ 6; // toggle between 5 and 6
|
||
|
||
unsigned rm = c->Irm;
|
||
unsigned reg = rm & modregrm(0,7,0); // isolate reg field
|
||
unsigned ereg = rm & 7;
|
||
//printf("c = %p, op = %02x rm = %02x\n", c, op, rm);
|
||
|
||
/* If immediate second operand */
|
||
if ((ins & T ||
|
||
((op == 0xF6 || op == 0xF7) && (reg < modregrm(0,2,0) || reg > modregrm(0,3,0)))
|
||
) &&
|
||
c->IFL2 == FLconst)
|
||
{
|
||
int flags = c->Iflags & CFpsw; /* if want result in flags */
|
||
targ_long u = c->IEV2.Vuns;
|
||
if (ins & E)
|
||
u = (signed char) u;
|
||
else if (shortop)
|
||
u = (short) u;
|
||
|
||
// Replace CMP reg,0 with TEST reg,reg
|
||
if ((op & 0xFE) == 0x80 && // 80 is CMP R8,imm8; 81 is CMP reg,imm
|
||
rm >= modregrm(3,7,AX) &&
|
||
u == 0)
|
||
{ c->Iop = (op & 1) | 0x84;
|
||
c->Irm = modregrm(3,ereg,ereg);
|
||
if (c->Irex & REX_B)
|
||
c->Irex |= REX_R;
|
||
goto L1;
|
||
}
|
||
|
||
/* Optimize ANDs with an immediate constant */
|
||
if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0))
|
||
{
|
||
if (rm >= modregrm(3,4,AX)) // AND reg,imm
|
||
{
|
||
if (u == 0)
|
||
{ /* Replace with XOR reg,reg */
|
||
c->Iop = 0x30 | (op & 1);
|
||
c->Irm = modregrm(3,ereg,ereg);
|
||
if (c->Irex & REX_B)
|
||
c->Irex |= REX_R;
|
||
goto L1;
|
||
}
|
||
if (u == 0xFFFFFFFF && !flags)
|
||
{ c->Iop = NOP;
|
||
goto L1;
|
||
}
|
||
}
|
||
if (op == 0x81 && !flags)
|
||
{ // If we can do the operation in one byte
|
||
|
||
// If EA is not SI or DI
|
||
if ((rm < modregrm(3,4,SP) || I64) &&
|
||
(config.flags4 & CFG4space ||
|
||
config.target_cpu < TARGET_PentiumPro)
|
||
)
|
||
{
|
||
if ((u & 0xFFFFFF00) == 0xFFFFFF00)
|
||
goto L2;
|
||
else if (rm < modregrm(3,0,0) || (!c->Irex && ereg < 4))
|
||
{ if (!shortop)
|
||
{ if ((u & 0xFFFF00FF) == 0xFFFF00FF)
|
||
goto L3;
|
||
}
|
||
else
|
||
{
|
||
if ((u & 0xFF) == 0xFF)
|
||
goto L3;
|
||
}
|
||
}
|
||
}
|
||
if (!shortop && useopsize)
|
||
{
|
||
if ((u & 0xFFFF0000) == 0xFFFF0000)
|
||
{ c->Iflags ^= CFopsize;
|
||
goto L1;
|
||
}
|
||
if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX))
|
||
{ c->IEVoffset1 += 2; /* address MSW */
|
||
c->IEV2.Vuns >>= 16;
|
||
c->Iflags ^= CFopsize;
|
||
goto L1;
|
||
}
|
||
if (rm >= modregrm(3,4,AX))
|
||
{
|
||
if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64))
|
||
{ c->Iop = 0x0FB6; // MOVZX
|
||
c->Irm = modregrm(3,ereg,ereg);
|
||
if (c->Irex & REX_B)
|
||
c->Irex |= REX_R;
|
||
goto L1;
|
||
}
|
||
if (u == 0xFFFF)
|
||
{ c->Iop = 0x0FB7; // MOVZX
|
||
c->Irm = modregrm(3,ereg,ereg);
|
||
if (c->Irex & REX_B)
|
||
c->Irex |= REX_R;
|
||
goto L1;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Look for ADD,OR,SUB,XOR with u that we can eliminate */
|
||
if (!flags &&
|
||
(op == 0x81 || op == 0x80) &&
|
||
(reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || // ADD,OR
|
||
reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) // SUB, XOR
|
||
)
|
||
{
|
||
if (u == 0)
|
||
{
|
||
c->Iop = NOP;
|
||
goto L1;
|
||
}
|
||
if (u == ~0 && reg == modregrm(0,6,0)) /* XOR */
|
||
{
|
||
c->Iop = 0xF6 | (op & 1); /* NOT */
|
||
c->Irm ^= modregrm(0,6^2,0);
|
||
goto L1;
|
||
}
|
||
if (!shortop &&
|
||
useopsize &&
|
||
op == 0x81 &&
|
||
(u & 0xFFFF0000) == 0 &&
|
||
(reg == modregrm(0,6,0) || reg == modregrm(0,1,0)))
|
||
{ c->Iflags ^= CFopsize;
|
||
goto L1;
|
||
}
|
||
}
|
||
|
||
/* Look for TEST or OR or XOR with an immediate constant */
|
||
/* that we can replace with a byte operation */
|
||
if (op == 0xF7 && reg == modregrm(0,0,0) ||
|
||
op == 0x81 && reg == modregrm(0,6,0) && !flags ||
|
||
op == 0x81 && reg == modregrm(0,1,0))
|
||
{
|
||
// See if we can replace a dword with a word
|
||
// (avoid for 32 bit instructions, because CFopsize
|
||
// is too slow)
|
||
if (!shortop && useopsize)
|
||
{ if ((u & 0xFFFF0000) == 0)
|
||
{ c->Iflags ^= CFopsize;
|
||
goto L1;
|
||
}
|
||
/* If memory (not register) addressing mode */
|
||
if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX))
|
||
{ c->IEVoffset1 += 2; /* address MSW */
|
||
c->IEV2.Vuns >>= 16;
|
||
c->Iflags ^= CFopsize;
|
||
goto L1;
|
||
}
|
||
}
|
||
|
||
// If EA is not SI or DI
|
||
if (rm < (modregrm(3,0,SP) | reg) &&
|
||
(usespace ||
|
||
config.target_cpu < TARGET_PentiumPro)
|
||
)
|
||
{
|
||
if ((u & 0xFFFFFF00) == 0)
|
||
{
|
||
L2: c->Iop--; /* to byte instruction */
|
||
c->Iflags &= ~CFopsize;
|
||
goto L1;
|
||
}
|
||
if (((u & 0xFFFF00FF) == 0 ||
|
||
(shortop && (u & 0xFF) == 0)) &&
|
||
(rm < modregrm(3,0,0) || (!c->Irex && ereg < 4)))
|
||
{
|
||
L3:
|
||
c->IEV2.Vuns >>= 8;
|
||
if (rm >= (modregrm(3,0,AX) | reg))
|
||
c->Irm |= 4; /* AX->AH, BX->BH, etc. */
|
||
else
|
||
c->IEVoffset1 += 1;
|
||
goto L2;
|
||
}
|
||
}
|
||
#if 0
|
||
// BUG: which is right?
|
||
else if ((u & 0xFFFF0000) == 0)
|
||
#else
|
||
else if (0 && op == 0xF7 &&
|
||
rm >= modregrm(3,0,SP) &&
|
||
(u & 0xFFFF0000) == 0)
|
||
#endif
|
||
c->Iflags &= ~CFopsize;
|
||
}
|
||
|
||
// Try to replace TEST reg,-1 with TEST reg,reg
|
||
if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8
|
||
{ if ((u & 0xFF) == 0xFF)
|
||
{
|
||
L4: c->Iop = 0x84; // TEST regL,regL
|
||
c->Irm = modregrm(3,ereg,ereg);
|
||
if (c->Irex & REX_B)
|
||
c->Irex |= REX_R;
|
||
c->Iflags &= ~CFopsize;
|
||
goto L1;
|
||
}
|
||
}
|
||
if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4))
|
||
{ if (u == 0xFF)
|
||
goto L4;
|
||
if ((u & 0xFFFF) == 0xFF00 && shortop && !c->Irex && ereg < 4)
|
||
{ ereg |= 4; /* to regH */
|
||
goto L4;
|
||
}
|
||
}
|
||
|
||
/* Look for sign extended immediate data */
|
||
if ((signed char) u == u)
|
||
{
|
||
if (op == 0x81)
|
||
{ if (reg != 0x08 && reg != 0x20 && reg != 0x30)
|
||
c->Iop = op = 0x83; /* 8 bit sgn ext */
|
||
}
|
||
else if (op == 0x69) /* IMUL rw,ew,dw */
|
||
c->Iop = op = 0x6B; /* IMUL rw,ew,db */
|
||
}
|
||
|
||
// Look for SHIFT EA,imm8 we can replace with short form
|
||
if (u == 1 && ((op & 0xFE) == 0xC0))
|
||
c->Iop |= 0xD0;
|
||
|
||
} /* if immediate second operand */
|
||
|
||
/* Look for AX short form */
|
||
if (ins & A)
|
||
{ if (rm == modregrm(0,AX,local_BPRM) &&
|
||
!(c->Irex & REX_R) && // and it's AX, not R8
|
||
(op & ~3) == 0x88 &&
|
||
!I64)
|
||
{ op = ((op & 3) + 0xA0) ^ 2;
|
||
/* 8A-> A0 */
|
||
/* 8B-> A1 */
|
||
/* 88-> A2 */
|
||
/* 89-> A3 */
|
||
c->Iop = op;
|
||
c->IFL2 = c->IFL1;
|
||
c->IEV2 = c->IEV1;
|
||
}
|
||
|
||
/* Replace MOV REG1,REG2 with MOV EREG1,EREG2 */
|
||
else if (!I16 &&
|
||
(op == 0x89 || op == 0x8B) &&
|
||
(rm & 0xC0) == 0xC0 &&
|
||
(!b || b->BC != BCasm)
|
||
)
|
||
c->Iflags &= ~CFopsize;
|
||
|
||
// If rm is AX
|
||
else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c->Irex & (REX_R | REX_B)))
|
||
{ switch (op)
|
||
{ case 0x80: op = reg | 4; break;
|
||
case 0x81: op = reg | 5; break;
|
||
case 0x87: op = 0x90 + (reg>>3); break; // XCHG
|
||
case 0xF6:
|
||
if (reg == 0)
|
||
op = 0xA8; /* TEST AL,immed8 */
|
||
break;
|
||
case 0xF7:
|
||
if (reg == 0)
|
||
op = 0xA9; /* TEST AX,immed16 */
|
||
break;
|
||
}
|
||
c->Iop = op;
|
||
}
|
||
}
|
||
|
||
/* Look for reg short form */
|
||
if ((ins & R) && (rm & 0xC0) == 0xC0)
|
||
{ switch (op)
|
||
{ case 0xC6: op = 0xB0 + ereg; break;
|
||
case 0xC7: op = 0xB8 + ereg; break;
|
||
case 0xFF:
|
||
switch (reg)
|
||
{ case 6<<3: op = 0x50+ereg; break;/* PUSH*/
|
||
case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/
|
||
case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/
|
||
}
|
||
break;
|
||
case 0x8F: op = 0x58 + ereg; break;
|
||
case 0x87:
|
||
if (reg == 0) op = 0x90 + ereg;
|
||
break;
|
||
}
|
||
c->Iop = op;
|
||
}
|
||
|
||
// Look to replace SHL reg,1 with ADD reg,reg
|
||
if ((op & ~1) == 0xD0 &&
|
||
(rm & modregrm(3,7,0)) == modregrm(3,4,0) &&
|
||
config.target_cpu >= TARGET_80486)
|
||
{
|
||
c->Iop &= 1;
|
||
c->Irm = (rm & modregrm(3,0,7)) | (ereg << 3);
|
||
if (c->Irex & REX_B)
|
||
c->Irex |= REX_R;
|
||
if (!(c->Iflags & CFpsw) && !I16)
|
||
c->Iflags &= ~CFopsize;
|
||
goto L1;
|
||
}
|
||
|
||
/* Look for sign extended modregrm displacement, or 0
|
||
* displacement.
|
||
*/
|
||
|
||
if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp
|
||
c->IFL1 == FLconst) // and it's a constant
|
||
{
|
||
a = c->IEVpointer1;
|
||
if (a == 0 && (rm & 7) != local_BPRM && // if 0[disp]
|
||
!(local_BPRM == 5 && (rm & 7) == 4 && (c->Isib & 7) == BP)
|
||
)
|
||
c->Irm &= 0x3F;
|
||
else if (!I16)
|
||
{
|
||
if ((targ_size_t)(targ_schar)a == a)
|
||
c->Irm ^= 0xC0; /* do 8 sx */
|
||
}
|
||
else if (((targ_size_t)(targ_schar)a & 0xFFFF) == (a & 0xFFFF))
|
||
c->Irm ^= 0xC0; /* do 8 sx */
|
||
}
|
||
|
||
/* Look for LEA reg,[ireg], replace with MOV reg,ireg */
|
||
else if (op == 0x8D)
|
||
{ rm = c->Irm & 7;
|
||
mod = c->Irm & modregrm(3,0,0);
|
||
if (mod == 0)
|
||
{
|
||
if (!I16)
|
||
{
|
||
switch (rm)
|
||
{
|
||
case 4:
|
||
case 5:
|
||
break;
|
||
default:
|
||
c->Irm |= modregrm(3,0,0);
|
||
c->Iop = 0x8B;
|
||
break;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
switch (rm)
|
||
{
|
||
case 4: rm = modregrm(3,0,SI); goto L6;
|
||
case 5: rm = modregrm(3,0,DI); goto L6;
|
||
case 7: rm = modregrm(3,0,BX); goto L6;
|
||
L6: c->Irm = rm + reg;
|
||
c->Iop = 0x8B;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
/* replace LEA reg,0[BP] with MOV reg,BP */
|
||
else if (mod == modregrm(1,0,0) && rm == local_BPRM &&
|
||
c->IFL1 == FLconst && c->IEVpointer1 == 0)
|
||
{ c->Iop = 0x8B; /* MOV reg,BP */
|
||
c->Irm = modregrm(3,0,BP) + reg;
|
||
}
|
||
}
|
||
|
||
// Replace [R13] with 0[R13]
|
||
if (c->Irex & REX_B && (c->Irm & modregrm(3,0,5)) == modregrm(0,0,5))
|
||
{
|
||
c->Irm |= modregrm(1,0,0);
|
||
c->IFL1 = FLconst;
|
||
c->IEVpointer1 = 0;
|
||
}
|
||
}
|
||
else if (!(c->Iflags & CFvex))
|
||
{
|
||
switch (op)
|
||
{
|
||
default:
|
||
if ((op & ~0x0F) != 0x70)
|
||
break;
|
||
case JMP:
|
||
switch (c->IFL2)
|
||
{ case FLcode:
|
||
if (c->IEV2.Vcode == code_next(c))
|
||
{ c->Iop = NOP;
|
||
continue;
|
||
}
|
||
break;
|
||
case FLblock:
|
||
if (!code_next(c) && c->IEV2.Vblock == bn)
|
||
{ c->Iop = NOP;
|
||
continue;
|
||
}
|
||
break;
|
||
case FLconst:
|
||
case FLfunc:
|
||
case FLextern:
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL((enum FL)c->IFL2);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
break;
|
||
|
||
case 0x68: // PUSH immed16
|
||
if (c->IFL2 == FLconst)
|
||
{
|
||
targ_long u = c->IEV2.Vuns;
|
||
if (I64 ||
|
||
((c->Iflags & CFopsize) ? I16 : I32))
|
||
{ // PUSH 32/64 bit operand
|
||
if (u == (signed char) u)
|
||
c->Iop = 0x6A; // PUSH immed8
|
||
}
|
||
else // PUSH 16 bit operand
|
||
{ if ((short)u == (signed char) u)
|
||
c->Iop = 0x6A; // PUSH immed8
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
#if 0
|
||
if (1 || debugc) {
|
||
printf("-pinholeopt(%p)\n",cstart);
|
||
for (c = cstart; c; c = code_next(c))
|
||
c->print();
|
||
}
|
||
#endif
|
||
}
|
||
|
||
#ifdef DEBUG
|
||
STATIC void pinholeopt_unittest()
|
||
{
|
||
//printf("pinholeopt_unittest()\n");
|
||
struct CS { unsigned model,op,ea,ev1,ev2,flags; } tests[][2] =
|
||
{
|
||
// XOR reg,immed NOT regL
|
||
{{ 16,0x81,modregrm(3,6,BX),0,0xFF,0 }, { 0,0xF6,modregrm(3,2,BX),0,0xFF }},
|
||
|
||
// MOV 0[BX],3 MOV [BX],3
|
||
{{ 16,0xC7,modregrm(2,0,7),0,3}, { 0,0xC7,modregrm(0,0,7),0,3 }},
|
||
|
||
#if 0 // only if config.flags4 & CFG4space
|
||
// TEST regL,immed8
|
||
{{ 0,0xF6,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }},
|
||
{{ 0,0xF7,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }},
|
||
{{ 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }},
|
||
{{ 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }},
|
||
#endif
|
||
|
||
// PUSH immed => PUSH immed8
|
||
{{ 0,0x68,0,0,0 }, { 0,0x6A,0,0,0 }},
|
||
{{ 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }},
|
||
{{ 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }},
|
||
{{ 16,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }},
|
||
{{ 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }},
|
||
{{ 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }},
|
||
{{ 16,0x68,0,0,0x10000,0 }, { 0,0x6A,0,0,0x10000,0 }},
|
||
{{ 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }},
|
||
{{ 32,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }},
|
||
{{ 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }},
|
||
{{ 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }},
|
||
{{ 32,0x68,0,0,0x10000,CFopsize }, { 0,0x6A,0,0,0x10000,CFopsize }},
|
||
{{ 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }},
|
||
};
|
||
|
||
//config.flags4 |= CFG4space;
|
||
for (int i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
|
||
{ CS *pin = &tests[i][0];
|
||
CS *pout = &tests[i][1];
|
||
code cs;
|
||
memset(&cs, 0, sizeof(cs));
|
||
if (pin->model)
|
||
{
|
||
if (I16 && pin->model != 16)
|
||
continue;
|
||
if (I32 && pin->model != 32)
|
||
continue;
|
||
if (I64 && pin->model != 64)
|
||
continue;
|
||
}
|
||
//printf("[%d]\n", i);
|
||
cs.Iop = pin->op;
|
||
cs.Iea = pin->ea;
|
||
cs.IFL1 = FLconst;
|
||
cs.IFL2 = FLconst;
|
||
cs.IEV1.Vuns = pin->ev1;
|
||
cs.IEV2.Vuns = pin->ev2;
|
||
cs.Iflags = pin->flags;
|
||
pinholeopt(&cs, NULL);
|
||
if (cs.Iop != pout->op)
|
||
{ printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout->op);
|
||
assert(0);
|
||
}
|
||
assert(cs.Iea == pout->ea);
|
||
assert(cs.IEV1.Vuns == pout->ev1);
|
||
assert(cs.IEV2.Vuns == pout->ev2);
|
||
assert(cs.Iflags == pout->flags);
|
||
}
|
||
}
|
||
#endif
|
||
|
||
/**************************
|
||
* Compute jump addresses for FLcode.
|
||
* Note: only works for forward referenced code.
|
||
* only direct jumps and branches are detected.
|
||
* LOOP instructions only work for backward refs.
|
||
*/
|
||
|
||
void jmpaddr(code *c)
|
||
{ code *ci,*cn,*ctarg,*cstart;
|
||
targ_size_t ad;
|
||
unsigned op;
|
||
|
||
//printf("jmpaddr()\n");
|
||
cstart = c; /* remember start of code */
|
||
while (c)
|
||
{
|
||
op = c->Iop;
|
||
if (op <= 0xEB &&
|
||
inssize[op] & T && // if second operand
|
||
c->IFL2 == FLcode &&
|
||
((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ || op == CALL))
|
||
{ ci = code_next(c);
|
||
ctarg = c->IEV2.Vcode; /* target code */
|
||
ad = 0; /* IP displacement */
|
||
while (ci && ci != ctarg)
|
||
{
|
||
ad += calccodsize(ci);
|
||
ci = code_next(ci);
|
||
}
|
||
if (!ci)
|
||
goto Lbackjmp; // couldn't find it
|
||
if (!I16 || op == JMP || op == JMPS || op == JCXZ || op == CALL)
|
||
c->IEVpointer2 = ad;
|
||
else /* else conditional */
|
||
{ if (!(c->Iflags & CFjmp16)) /* if branch */
|
||
c->IEVpointer2 = ad;
|
||
else /* branch around a long jump */
|
||
{ cn = code_next(c);
|
||
code_next(c) = code_calloc();
|
||
code_next(code_next(c)) = cn;
|
||
c->Iop = op ^ 1; /* converse jmp */
|
||
c->Iflags &= ~CFjmp16;
|
||
c->IEVpointer2 = I16 ? 3 : 5;
|
||
cn = code_next(c);
|
||
cn->Iop = JMP; /* long jump */
|
||
cn->IFL2 = FLconst;
|
||
cn->IEVpointer2 = ad;
|
||
}
|
||
}
|
||
c->IFL2 = FLconst;
|
||
}
|
||
if (op == LOOP && c->IFL2 == FLcode) /* backwards refs */
|
||
{
|
||
Lbackjmp:
|
||
ctarg = c->IEV2.Vcode;
|
||
for (ci = cstart; ci != ctarg; ci = code_next(ci))
|
||
if (!ci || ci == c)
|
||
assert(0);
|
||
ad = 2; /* - IP displacement */
|
||
while (ci != c)
|
||
{ assert(ci);
|
||
ad += calccodsize(ci);
|
||
ci = code_next(ci);
|
||
}
|
||
c->IEVpointer2 = (-ad) & 0xFF;
|
||
c->IFL2 = FLconst;
|
||
}
|
||
c = code_next(c);
|
||
}
|
||
}
|
||
|
||
/*******************************
|
||
* Calculate bl->Bsize.
|
||
*/
|
||
|
||
unsigned calcblksize(code *c)
|
||
{ unsigned size;
|
||
|
||
for (size = 0; c; c = code_next(c))
|
||
{
|
||
unsigned sz = calccodsize(c);
|
||
//printf("off=%02x, sz = %d, code %p: op=%02x\n", size, sz, c, c->Iop);
|
||
size += sz;
|
||
}
|
||
//printf("calcblksize(c = x%x) = %d\n", c, size);
|
||
return size;
|
||
}
|
||
|
||
/*****************************
|
||
* Calculate and return code size of a code.
|
||
* Note that NOPs are sometimes used as markers, but are
|
||
* never output. LINNUMs are never output.
|
||
* Note: This routine must be fast. Profiling shows it is significant.
|
||
*/
|
||
|
||
unsigned calccodsize(code *c)
|
||
{ unsigned size;
|
||
unsigned op;
|
||
unsigned char rm,mod,ins;
|
||
unsigned iflags;
|
||
unsigned i32 = I32 || I64;
|
||
unsigned a32 = i32;
|
||
|
||
#ifdef DEBUG
|
||
assert((a32 & ~1) == 0);
|
||
#endif
|
||
iflags = c->Iflags;
|
||
op = c->Iop;
|
||
if (iflags & CFvex)
|
||
{
|
||
ins = vex_inssize(c);
|
||
size = ins & 7;
|
||
goto Lmodrm;
|
||
}
|
||
else if ((op & 0xFF00) == 0x0F00 || (op & 0xFFFD00) == 0x0F3800)
|
||
op = 0x0F;
|
||
else
|
||
op &= 0xFF;
|
||
switch (op)
|
||
{
|
||
case 0x0F:
|
||
if ((c->Iop & 0xFFFD00) == 0x0F3800)
|
||
{ // 3 byte op ( 0F38-- or 0F3A-- )
|
||
ins = inssize2[(c->Iop >> 8) & 0xFF];
|
||
size = ins & 7;
|
||
if (c->Iop & 0xFF000000)
|
||
size++;
|
||
}
|
||
else
|
||
{ // 2 byte op ( 0F-- )
|
||
ins = inssize2[c->Iop & 0xFF];
|
||
size = ins & 7;
|
||
if (c->Iop & 0xFF0000)
|
||
size++;
|
||
}
|
||
break;
|
||
|
||
case NOP:
|
||
case ESCAPE:
|
||
size = 0; // since these won't be output
|
||
goto Lret2;
|
||
|
||
case ASM:
|
||
if (c->Iflags == CFaddrsize) // kludge for DA inline asm
|
||
size = NPTRSIZE;
|
||
else
|
||
size = c->IEV1.as.len;
|
||
goto Lret2;
|
||
|
||
case 0xA1:
|
||
case 0xA3:
|
||
if (c->Irex)
|
||
{
|
||
size = 9; // 64 bit immediate value for MOV to/from RAX
|
||
goto Lret;
|
||
}
|
||
goto Ldefault;
|
||
|
||
case 0xF6: /* TEST mem8,immed8 */
|
||
ins = inssize[op];
|
||
size = ins & 7;
|
||
if (i32)
|
||
size = inssize32[op];
|
||
if ((c->Irm & (7<<3)) == 0)
|
||
size++; /* size of immed8 */
|
||
break;
|
||
|
||
case 0xF7:
|
||
ins = inssize[op];
|
||
size = ins & 7;
|
||
if (i32)
|
||
size = inssize32[op];
|
||
if ((c->Irm & (7<<3)) == 0)
|
||
size += (i32 ^ ((iflags & CFopsize) !=0)) ? 4 : 2;
|
||
break;
|
||
|
||
default:
|
||
Ldefault:
|
||
ins = inssize[op];
|
||
size = ins & 7;
|
||
if (i32)
|
||
size = inssize32[op];
|
||
}
|
||
|
||
if (iflags & (CFwait | CFopsize | CFaddrsize | CFSEG))
|
||
{
|
||
if (iflags & CFwait) // if add FWAIT prefix
|
||
size++;
|
||
if (iflags & CFSEG) // if segment override
|
||
size++;
|
||
|
||
// If the instruction has a second operand that is not an 8 bit,
|
||
// and the operand size prefix is present, then fix the size computation
|
||
// because the operand size will be different.
|
||
// Walter, I had problems with this bit at the end. There can still be
|
||
// an ADDRSIZE prefix for these and it does indeed change the operand size.
|
||
|
||
if (iflags & (CFopsize | CFaddrsize))
|
||
{
|
||
if ((ins & (T|E)) == T)
|
||
{
|
||
if ((op & 0xAC) == 0xA0)
|
||
{
|
||
if (iflags & CFaddrsize && !I64)
|
||
{ if (I32)
|
||
size -= 2;
|
||
else
|
||
size += 2;
|
||
}
|
||
}
|
||
else if (iflags & CFopsize)
|
||
{ if (I16)
|
||
size += 2;
|
||
else
|
||
size -= 2;
|
||
}
|
||
}
|
||
if (iflags & CFaddrsize)
|
||
{ if (!I64)
|
||
a32 ^= 1;
|
||
size++;
|
||
}
|
||
if (iflags & CFopsize)
|
||
size++; /* +1 for OPSIZE prefix */
|
||
}
|
||
}
|
||
|
||
Lmodrm:
|
||
if ((op & ~0x0F) == 0x70)
|
||
{ if (iflags & CFjmp16) // if long branch
|
||
size += I16 ? 3 : 4; // + 3(4) bytes for JMP
|
||
}
|
||
else if (ins & M) // if modregrm byte
|
||
{
|
||
rm = c->Irm;
|
||
mod = rm & 0xC0;
|
||
if (a32 || I64)
|
||
{ // 32 bit addressing
|
||
if (issib(rm))
|
||
size++;
|
||
switch (mod)
|
||
{ case 0:
|
||
if (issib(rm) && (c->Isib & 7) == 5 ||
|
||
(rm & 7) == 5)
|
||
size += 4; /* disp32 */
|
||
if (c->Irex & REX_B && (rm & 7) == 5)
|
||
/* Instead of selecting R13, this mode is an [RIP] relative
|
||
* address. Although valid, it's redundant, and should not
|
||
* be generated. Instead, generate 0[R13] instead of [R13].
|
||
*/
|
||
assert(0);
|
||
break;
|
||
case 0x40:
|
||
size++; /* disp8 */
|
||
break;
|
||
case 0x80:
|
||
size += 4; /* disp32 */
|
||
break;
|
||
}
|
||
}
|
||
else
|
||
{ // 16 bit addressing
|
||
if (mod == 0x40) /* 01: 8 bit displacement */
|
||
size++;
|
||
else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6))
|
||
size += 2;
|
||
}
|
||
}
|
||
|
||
Lret:
|
||
if (!(iflags & CFvex) && c->Irex)
|
||
{ size++;
|
||
if (c->Irex & REX_W && (op & ~7) == 0xB8)
|
||
size += 4;
|
||
}
|
||
Lret2:
|
||
//printf("op = x%02x, size = %d\n",op,size);
|
||
return size;
|
||
}
|
||
|
||
/********************************
|
||
* Return !=0 if codes match.
|
||
*/
|
||
|
||
#if 0
|
||
|
||
int code_match(code *c1,code *c2)
|
||
{ code cs1,cs2;
|
||
unsigned char ins;
|
||
|
||
if (c1 == c2)
|
||
goto match;
|
||
cs1 = *c1;
|
||
cs2 = *c2;
|
||
if (cs1.Iop != cs2.Iop)
|
||
goto nomatch;
|
||
switch (cs1.Iop)
|
||
{
|
||
case ESCAPE | ESCctor:
|
||
case ESCAPE | ESCdtor:
|
||
goto nomatch;
|
||
|
||
case NOP:
|
||
goto match;
|
||
|
||
case ASM:
|
||
if (cs1.IEV1.as.len == cs2.IEV1.as.len &&
|
||
memcmp(cs1.IEV1.as.bytes,cs2.IEV1.as.bytes,cs1.EV1.as.len) == 0)
|
||
goto match;
|
||
else
|
||
goto nomatch;
|
||
|
||
default:
|
||
if ((cs1.Iop & 0xFF) == ESCAPE)
|
||
goto match;
|
||
break;
|
||
}
|
||
if (cs1.Iflags != cs2.Iflags)
|
||
goto nomatch;
|
||
|
||
ins = inssize[cs1.Iop & 0xFF];
|
||
if ((cs1.Iop & 0xFFFD00) == 0x0F3800)
|
||
{
|
||
ins = inssize2[(cs1.Iop >> 8) & 0xFF];
|
||
}
|
||
else if ((cs1.Iop & 0xFF00) == 0x0F00)
|
||
{
|
||
ins = inssize2[cs1.Iop & 0xFF];
|
||
}
|
||
|
||
if (ins & M) // if modregrm byte
|
||
{
|
||
if (cs1.Irm != cs2.Irm)
|
||
goto nomatch;
|
||
if ((cs1.Irm & 0xC0) == 0xC0)
|
||
goto do2;
|
||
if (is32bitaddr(I32,cs1.Iflags))
|
||
{
|
||
if (issib(cs1.Irm) && cs1.Isib != cs2.Isib)
|
||
goto nomatch;
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
else
|
||
{
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 6))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
if (cs1.IFL1 != cs2.IFL1)
|
||
goto nomatch;
|
||
if (flinsymtab[cs1.IFL1] && cs1.IEVsym1 != cs2.IEVsym1)
|
||
goto nomatch;
|
||
if (cs1.IEVoffset1 != cs2.IEVoffset1)
|
||
goto nomatch;
|
||
}
|
||
|
||
do2:
|
||
if (!(ins & T)) // if no second operand
|
||
goto match;
|
||
if (cs1.IFL2 != cs2.IFL2)
|
||
goto nomatch;
|
||
if (flinsymtab[cs1.IFL2] && cs1.IEVsym2 != cs2.IEVsym2)
|
||
goto nomatch;
|
||
if (cs1.IEVoffset2 != cs2.IEVoffset2)
|
||
goto nomatch;
|
||
|
||
match:
|
||
return 1;
|
||
|
||
nomatch:
|
||
return 0;
|
||
}
|
||
|
||
#endif
|
||
|
||
/**************************
|
||
* Write code to intermediate file.
|
||
* Code starts at offset.
|
||
* Returns:
|
||
* addr of end of code
|
||
*/
|
||
|
||
static targ_size_t offset; /* to save code use a global */
|
||
static char bytes[100];
|
||
static char *pgen;
|
||
|
||
#define GEN(c) (*pgen++ = (c))
|
||
#define GENP(n,p) (memcpy(pgen,(p),(n)), pgen += (n))
|
||
#if ELFOBJ || MACHOBJ || _MSC_VER
|
||
#define FLUSH() if (pgen-bytes) cod3_flush()
|
||
#else
|
||
#define FLUSH() ((pgen - bytes) && cod3_flush())
|
||
#endif
|
||
#define OFFSET() (offset + (pgen - bytes))
|
||
|
||
STATIC void cod3_flush()
|
||
{
|
||
// Emit accumulated bytes to code segment
|
||
#ifdef DEBUG
|
||
assert(pgen - bytes < sizeof(bytes));
|
||
#endif
|
||
offset += obj_bytes(cseg,offset,pgen - bytes,bytes);
|
||
pgen = bytes;
|
||
}
|
||
|
||
unsigned codout(code *c)
|
||
{ unsigned op;
|
||
unsigned char rm,mod;
|
||
unsigned char ins;
|
||
code *cn;
|
||
unsigned flags;
|
||
symbol *s;
|
||
|
||
#ifdef DEBUG
|
||
if (debugc) printf("codout(%p), Coffset = x%llx\n",c,(unsigned long long)Coffset);
|
||
#endif
|
||
|
||
pgen = bytes;
|
||
offset = Coffset;
|
||
for (; c; c = code_next(c))
|
||
{
|
||
#ifdef DEBUG
|
||
if (debugc) { printf("off=%02lx, sz=%ld, ",(long)OFFSET(),(long)calccodsize(c)); c->print(); }
|
||
unsigned startoffset = OFFSET();
|
||
#endif
|
||
op = c->Iop;
|
||
ins = inssize[op & 0xFF];
|
||
switch (op & 0xFF)
|
||
{ case ESCAPE:
|
||
/* Check for SSE4 opcode v/pmaxuw xmm1,xmm2/m128 */
|
||
if(op == 0x660F383E || c->Iflags & CFvex) break;
|
||
|
||
switch (op & 0xFFFF00)
|
||
{ case ESClinnum:
|
||
/* put out line number stuff */
|
||
objlinnum(c->IEV1.Vsrcpos,OFFSET());
|
||
break;
|
||
#if SCPP
|
||
#if 1
|
||
case ESCctor:
|
||
case ESCdtor:
|
||
case ESCoffset:
|
||
if (config.exe != EX_NT)
|
||
except_pair_setoffset(c,OFFSET() - funcoffset);
|
||
break;
|
||
case ESCmark:
|
||
case ESCrelease:
|
||
case ESCmark2:
|
||
case ESCrelease2:
|
||
break;
|
||
#else
|
||
case ESCctor:
|
||
except_push(OFFSET() - funcoffset,c->IEV1.Vtor,NULL);
|
||
break;
|
||
case ESCdtor:
|
||
except_pop(OFFSET() - funcoffset,c->IEV1.Vtor,NULL);
|
||
break;
|
||
case ESCmark:
|
||
except_mark();
|
||
break;
|
||
case ESCrelease:
|
||
except_release();
|
||
break;
|
||
#endif
|
||
#endif
|
||
}
|
||
#ifdef DEBUG
|
||
assert(calccodsize(c) == 0);
|
||
#endif
|
||
continue;
|
||
case NOP: /* don't send them out */
|
||
if (op != NOP)
|
||
break;
|
||
#ifdef DEBUG
|
||
assert(calccodsize(c) == 0);
|
||
#endif
|
||
continue;
|
||
case ASM:
|
||
if (op != ASM)
|
||
break;
|
||
FLUSH();
|
||
if (c->Iflags == CFaddrsize) // kludge for DA inline asm
|
||
{
|
||
do32bit(FLblockoff,&c->IEV1,0);
|
||
}
|
||
else
|
||
{
|
||
offset += obj_bytes(cseg,offset,c->IEV1.as.len,c->IEV1.as.bytes);
|
||
}
|
||
#ifdef DEBUG
|
||
assert(calccodsize(c) == c->IEV1.as.len);
|
||
#endif
|
||
continue;
|
||
}
|
||
flags = c->Iflags;
|
||
|
||
// See if we need to flush (don't have room for largest code sequence)
|
||
if (pgen - bytes > sizeof(bytes) - (1+4+4+8+8))
|
||
FLUSH();
|
||
|
||
// see if we need to put out prefix bytes
|
||
if (flags & (CFwait | CFPREFIX | CFjmp16))
|
||
{ int override;
|
||
|
||
if (flags & CFwait)
|
||
GEN(0x9B); // FWAIT
|
||
/* ? SEGES : SEGSS */
|
||
switch (flags & CFSEG)
|
||
{ case CFes: override = SEGES; goto segover;
|
||
case CFss: override = SEGSS; goto segover;
|
||
case CFcs: override = SEGCS; goto segover;
|
||
case CFds: override = SEGDS; goto segover;
|
||
case CFfs: override = SEGFS; goto segover;
|
||
case CFgs: override = SEGGS; goto segover;
|
||
segover: GEN(override);
|
||
break;
|
||
}
|
||
|
||
if (flags & CFaddrsize)
|
||
GEN(0x67);
|
||
|
||
// Do this last because of instructions like ADDPD
|
||
if (flags & CFopsize)
|
||
GEN(0x66); /* operand size */
|
||
|
||
if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */
|
||
{
|
||
if (!I16)
|
||
{ // Put out 16 bit conditional jump
|
||
c->Iop = op = 0x0F00 | (0x80 | (op & 0x0F));
|
||
}
|
||
else
|
||
{
|
||
cn = code_calloc();
|
||
/*cxcalloc++;*/
|
||
code_next(cn) = code_next(c);
|
||
code_next(c) = cn; // link into code
|
||
cn->Iop = JMP; // JMP block
|
||
cn->IFL2 = c->IFL2;
|
||
cn->IEV2.Vblock = c->IEV2.Vblock;
|
||
c->Iop = op ^= 1; // toggle condition
|
||
c->IFL2 = FLconst;
|
||
c->IEVpointer2 = I16 ? 3 : 5; // skip over JMP block
|
||
c->Iflags &= ~CFjmp16;
|
||
}
|
||
}
|
||
}
|
||
|
||
if (flags & CFvex)
|
||
{
|
||
if (flags & CFvex3)
|
||
{
|
||
GEN(0xC4);
|
||
GEN(VEX3_B1(c->Ivex));
|
||
GEN(VEX3_B2(c->Ivex));
|
||
GEN(c->Ivex.op);
|
||
}
|
||
else
|
||
{
|
||
GEN(0xC5);
|
||
GEN(VEX2_B1(c->Ivex));
|
||
GEN(c->Ivex.op);
|
||
}
|
||
ins = vex_inssize(c);
|
||
goto Lmodrm;
|
||
}
|
||
|
||
if (op > 0xFF)
|
||
{
|
||
if ((op & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(op >> 8) & 0xFF];
|
||
else if ((op & 0xFF00) == 0x0F00)
|
||
ins = inssize2[op & 0xFF];
|
||
|
||
if (op & 0xFF000000)
|
||
{
|
||
unsigned char op1 = op >> 24;
|
||
if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66)
|
||
{
|
||
GEN(op1);
|
||
if (c->Irex)
|
||
GEN(c->Irex | REX);
|
||
}
|
||
else
|
||
{
|
||
if (c->Irex)
|
||
GEN(c->Irex | REX);
|
||
GEN(op1);
|
||
}
|
||
GEN((op >> 16) & 0xFF);
|
||
GEN((op >> 8) & 0xFF);
|
||
GEN(op & 0xFF);
|
||
}
|
||
else if (op & 0xFF0000)
|
||
{
|
||
unsigned char op1 = op >> 16;
|
||
if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66)
|
||
{
|
||
GEN(op1);
|
||
if (c->Irex)
|
||
GEN(c->Irex | REX);
|
||
}
|
||
else
|
||
{
|
||
if (c->Irex)
|
||
GEN(c->Irex | REX);
|
||
GEN(op1);
|
||
}
|
||
GEN((op >> 8) & 0xFF);
|
||
GEN(op & 0xFF);
|
||
}
|
||
else
|
||
{
|
||
if (c->Irex)
|
||
GEN(c->Irex | REX);
|
||
GEN((op >> 8) & 0xFF);
|
||
GEN(op & 0xFF);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if (c->Irex)
|
||
GEN(c->Irex | REX);
|
||
GEN(op);
|
||
}
|
||
Lmodrm:
|
||
if (ins & M) /* if modregrm byte */
|
||
{
|
||
rm = c->Irm;
|
||
GEN(rm);
|
||
|
||
// Look for an address size override when working with the
|
||
// MOD R/M and SIB bytes
|
||
|
||
if (is32bitaddr( I32, flags))
|
||
{
|
||
if (issib(rm))
|
||
GEN(c->Isib);
|
||
switch (rm & 0xC0)
|
||
{ case 0x40:
|
||
do8bit((enum FL) c->IFL1,&c->IEV1); // 8 bit
|
||
break;
|
||
case 0:
|
||
if (!(issib(rm) && (c->Isib & 7) == 5 ||
|
||
(rm & 7) == 5))
|
||
break;
|
||
case 0x80:
|
||
{ int flags = CFoff;
|
||
targ_size_t val = 0;
|
||
if (I64)
|
||
{
|
||
if ((rm & modregrm(3,0,7)) == modregrm(0,0,5)) // if disp32[RIP]
|
||
{ flags |= CFpc32;
|
||
val = -4;
|
||
unsigned reg = rm & modregrm(0,7,0);
|
||
if (ins & T ||
|
||
((op == 0xF6 || op == 0xF7) && (reg == modregrm(0,0,0) || reg == modregrm(0,1,0))))
|
||
{ if (ins & E)
|
||
val = -5;
|
||
else if (c->Iflags & CFopsize)
|
||
val = -6;
|
||
else
|
||
val = -8;
|
||
}
|
||
#if TARGET_OSX
|
||
// Mach-O linkage already takes the 4 byte size into account
|
||
val += 4;
|
||
#endif
|
||
}
|
||
}
|
||
do32bit((enum FL)c->IFL1,&c->IEV1,flags,val);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
else
|
||
{
|
||
switch (rm & 0xC0)
|
||
{ case 0x40:
|
||
do8bit((enum FL) c->IFL1,&c->IEV1); // 8 bit
|
||
break;
|
||
case 0:
|
||
if ((rm & 7) != 6)
|
||
break;
|
||
case 0x80:
|
||
do16bit((enum FL)c->IFL1,&c->IEV1,CFoff);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if (op == 0xC8)
|
||
do16bit((enum FL)c->IFL1,&c->IEV1,0);
|
||
}
|
||
flags &= CFseg | CFoff | CFselfrel;
|
||
if (ins & T) /* if second operand */
|
||
{ if (ins & E) /* if data-8 */
|
||
do8bit((enum FL) c->IFL2,&c->IEV2);
|
||
else if (!I16)
|
||
{
|
||
switch (op)
|
||
{ case 0xC2: /* RETN imm16 */
|
||
case 0xCA: /* RETF imm16 */
|
||
do16:
|
||
do16bit((enum FL)c->IFL2,&c->IEV2,flags);
|
||
break;
|
||
|
||
case 0xA1:
|
||
case 0xA3:
|
||
if (I64 && c->Irex)
|
||
{
|
||
do64:
|
||
do64bit((enum FL)c->IFL2,&c->IEV2,flags);
|
||
break;
|
||
}
|
||
case 0xA0: /* MOV AL,byte ptr [] */
|
||
case 0xA2:
|
||
if (c->Iflags & CFaddrsize && !I64)
|
||
goto do16;
|
||
else
|
||
do32:
|
||
do32bit((enum FL)c->IFL2,&c->IEV2,flags);
|
||
break;
|
||
case 0x9A:
|
||
case 0xEA:
|
||
if (c->Iflags & CFopsize)
|
||
goto ptr1616;
|
||
else
|
||
goto ptr1632;
|
||
|
||
case 0x68: // PUSH immed32
|
||
if ((enum FL)c->IFL2 == FLblock)
|
||
{
|
||
c->IFL2 = FLblockoff;
|
||
goto do32;
|
||
}
|
||
else
|
||
goto case_default;
|
||
|
||
case CALL: // CALL rel
|
||
case JMP: // JMP rel
|
||
flags |= CFselfrel;
|
||
goto case_default;
|
||
|
||
default:
|
||
if ((op|0xF) == 0x0F8F) // Jcc rel16 rel32
|
||
flags |= CFselfrel;
|
||
if (I64 && (op & ~7) == 0xB8 && c->Irex & REX_W)
|
||
goto do64;
|
||
case_default:
|
||
if (c->Iflags & CFopsize)
|
||
goto do16;
|
||
else
|
||
goto do32;
|
||
break;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
switch (op) {
|
||
case 0xC2:
|
||
case 0xCA:
|
||
goto do16;
|
||
case 0xA0:
|
||
case 0xA1:
|
||
case 0xA2:
|
||
case 0xA3:
|
||
if (c->Iflags & CFaddrsize)
|
||
goto do32;
|
||
else
|
||
goto do16;
|
||
break;
|
||
case 0x9A:
|
||
case 0xEA:
|
||
if (c->Iflags & CFopsize)
|
||
goto ptr1632;
|
||
else
|
||
goto ptr1616;
|
||
|
||
ptr1616:
|
||
ptr1632:
|
||
//assert(c->IFL2 == FLfunc);
|
||
FLUSH();
|
||
if (c->IFL2 == FLdatseg)
|
||
{
|
||
reftodatseg(cseg,offset,c->IEVpointer2,
|
||
c->IEVseg2,flags);
|
||
offset += 4;
|
||
}
|
||
else
|
||
{
|
||
s = c->IEVsym2;
|
||
offset += reftoident(cseg,offset,s,0,flags);
|
||
}
|
||
break;
|
||
|
||
case 0x68: // PUSH immed16
|
||
if ((enum FL)c->IFL2 == FLblock)
|
||
{ c->IFL2 = FLblockoff;
|
||
goto do16;
|
||
}
|
||
else
|
||
goto case_default16;
|
||
|
||
case CALL:
|
||
case JMP:
|
||
flags |= CFselfrel;
|
||
default:
|
||
case_default16:
|
||
if (c->Iflags & CFopsize)
|
||
goto do32;
|
||
else
|
||
goto do16;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
else if (op == 0xF6) /* TEST mem8,immed8 */
|
||
{ if ((rm & (7<<3)) == 0)
|
||
do8bit((enum FL)c->IFL2,&c->IEV2);
|
||
}
|
||
else if (op == 0xF7)
|
||
{ if ((rm & (7<<3)) == 0) /* TEST mem16/32,immed16/32 */
|
||
{
|
||
if ((I32 || I64) ^ ((c->Iflags & CFopsize) != 0))
|
||
do32bit((enum FL)c->IFL2,&c->IEV2,flags);
|
||
else
|
||
do16bit((enum FL)c->IFL2,&c->IEV2,flags);
|
||
}
|
||
}
|
||
#ifdef DEBUG
|
||
if (OFFSET() - startoffset != calccodsize(c))
|
||
{
|
||
printf("actual: %d, calc: %d\n", (int)(OFFSET() - startoffset), (int)calccodsize(c));
|
||
c->print();
|
||
assert(0);
|
||
}
|
||
#endif
|
||
}
|
||
FLUSH();
|
||
Coffset = offset;
|
||
//printf("-codout(), Coffset = x%x\n", Coffset);
|
||
return offset; /* ending address */
|
||
}
|
||
|
||
|
||
STATIC void do64bit(enum FL fl,union evc *uev,int flags)
|
||
{ char *p;
|
||
symbol *s;
|
||
targ_size_t ad;
|
||
|
||
assert(I64);
|
||
switch (fl)
|
||
{
|
||
case FLconst:
|
||
ad = * (targ_size_t *) uev;
|
||
L1:
|
||
GENP(8,&ad);
|
||
return;
|
||
case FLdatseg:
|
||
FLUSH();
|
||
reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg,CFoffset64 | flags);
|
||
break;
|
||
case FLframehandler:
|
||
framehandleroffset = OFFSET();
|
||
ad = 0;
|
||
goto L1;
|
||
case FLswitch:
|
||
FLUSH();
|
||
ad = uev->Vswitch->Btableoffset;
|
||
if (config.flags & CFGromable)
|
||
reftocodseg(cseg,offset,ad);
|
||
else
|
||
reftodatseg(cseg,offset,ad,JMPSEG,CFoff);
|
||
break;
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#if DEBUG
|
||
symbol_print(uev->sp.Vsym);
|
||
#endif
|
||
#endif
|
||
// NOTE: In ELFOBJ all symbol refs have been tagged FLextern
|
||
// strings and statics are treated like offsets from a
|
||
// un-named external with is the start of .rodata or .data
|
||
case FLextern: /* external data symbol */
|
||
case FLtlsdata:
|
||
#if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
case FLgot:
|
||
case FLgotoff:
|
||
#endif
|
||
FLUSH();
|
||
s = uev->sp.Vsym; /* symbol pointer */
|
||
reftoident(cseg,offset,s,uev->sp.Voffset,CFoffset64 | flags);
|
||
break;
|
||
|
||
#if TARGET_OSX
|
||
case FLgot:
|
||
funcsym_p->Slocalgotoffset = OFFSET();
|
||
ad = 0;
|
||
goto L1;
|
||
#endif
|
||
|
||
case FLfunc: /* function call */
|
||
s = uev->sp.Vsym; /* symbol pointer */
|
||
assert(TARGET_SEGMENTED || !tyfarfunc(s->ty()));
|
||
FLUSH();
|
||
reftoident(cseg,offset,s,0,CFoffset64 | flags);
|
||
break;
|
||
|
||
case FLblock: /* displacement to another block */
|
||
ad = uev->Vblock->Boffset - OFFSET() - 4;
|
||
//printf("FLblock: funcoffset = %x, OFFSET = %x, Boffset = %x, ad = %x\n", funcoffset, OFFSET(), uev->Vblock->Boffset, ad);
|
||
goto L1;
|
||
|
||
case FLblockoff:
|
||
FLUSH();
|
||
assert(uev->Vblock);
|
||
//printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", offset, uev->Vblock->Boffset, funcoffset);
|
||
reftocodseg(cseg,offset,uev->Vblock->Boffset);
|
||
break;
|
||
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
offset += 8;
|
||
}
|
||
|
||
|
||
STATIC void do32bit(enum FL fl,union evc *uev,int flags, targ_size_t val)
|
||
{ char *p;
|
||
symbol *s;
|
||
targ_size_t ad;
|
||
|
||
//printf("do32bit(flags = x%x)\n", flags);
|
||
switch (fl)
|
||
{
|
||
case FLconst:
|
||
assert(sizeof(targ_size_t) == 4 || sizeof(targ_size_t) == 8);
|
||
ad = * (targ_size_t *) uev;
|
||
L1:
|
||
GENP(4,&ad);
|
||
return;
|
||
case FLdatseg:
|
||
FLUSH();
|
||
reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg,flags);
|
||
break;
|
||
case FLframehandler:
|
||
framehandleroffset = OFFSET();
|
||
ad = 0;
|
||
goto L1;
|
||
case FLswitch:
|
||
FLUSH();
|
||
ad = uev->Vswitch->Btableoffset;
|
||
if (config.flags & CFGromable)
|
||
reftocodseg(cseg,offset,ad);
|
||
else
|
||
reftodatseg(cseg,offset,ad,JMPSEG,CFoff);
|
||
break;
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#if DEBUG
|
||
symbol_print(uev->sp.Vsym);
|
||
#endif
|
||
#endif
|
||
// NOTE: In ELFOBJ all symbol refs have been tagged FLextern
|
||
// strings and statics are treated like offsets from a
|
||
// un-named external with is the start of .rodata or .data
|
||
case FLextern: /* external data symbol */
|
||
case FLtlsdata:
|
||
#if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
case FLgot:
|
||
case FLgotoff:
|
||
#endif
|
||
FLUSH();
|
||
s = uev->sp.Vsym; /* symbol pointer */
|
||
reftoident(cseg,offset,s,uev->sp.Voffset + val,flags);
|
||
break;
|
||
|
||
#if TARGET_OSX
|
||
case FLgot:
|
||
funcsym_p->Slocalgotoffset = OFFSET();
|
||
ad = 0;
|
||
goto L1;
|
||
#endif
|
||
|
||
case FLfunc: /* function call */
|
||
s = uev->sp.Vsym; /* symbol pointer */
|
||
#if TARGET_SEGMENTED
|
||
if (tyfarfunc(s->ty()))
|
||
{ /* Large code references are always absolute */
|
||
FLUSH();
|
||
offset += reftoident(cseg,offset,s,0,flags) - 4;
|
||
}
|
||
else if (s->Sseg == cseg &&
|
||
(s->Sclass == SCstatic || s->Sclass == SCglobal) &&
|
||
s->Sxtrnnum == 0 && flags & CFselfrel)
|
||
{ /* if we know it's relative address */
|
||
ad = s->Soffset - OFFSET() - 4;
|
||
goto L1;
|
||
}
|
||
else
|
||
#endif
|
||
{
|
||
assert(TARGET_SEGMENTED || !tyfarfunc(s->ty()));
|
||
FLUSH();
|
||
reftoident(cseg,offset,s,val,flags);
|
||
}
|
||
break;
|
||
|
||
case FLblock: /* displacement to another block */
|
||
ad = uev->Vblock->Boffset - OFFSET() - 4;
|
||
//printf("FLblock: funcoffset = %x, OFFSET = %x, Boffset = %x, ad = %x\n", funcoffset, OFFSET(), uev->Vblock->Boffset, ad);
|
||
goto L1;
|
||
|
||
case FLblockoff:
|
||
FLUSH();
|
||
assert(uev->Vblock);
|
||
//printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", offset, uev->Vblock->Boffset, funcoffset);
|
||
reftocodseg(cseg,offset,uev->Vblock->Boffset);
|
||
break;
|
||
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
offset += 4;
|
||
}
|
||
|
||
|
||
STATIC void do16bit(enum FL fl,union evc *uev,int flags)
|
||
{ char *p;
|
||
symbol *s;
|
||
targ_size_t ad;
|
||
|
||
switch (fl)
|
||
{
|
||
case FLconst:
|
||
GENP(2,(char *) uev);
|
||
return;
|
||
case FLdatseg:
|
||
FLUSH();
|
||
reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg,flags);
|
||
break;
|
||
case FLswitch:
|
||
FLUSH();
|
||
ad = uev->Vswitch->Btableoffset;
|
||
if (config.flags & CFGromable)
|
||
reftocodseg(cseg,offset,ad);
|
||
else
|
||
reftodatseg(cseg,offset,ad,JMPSEG,CFoff);
|
||
break;
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#endif
|
||
case FLextern: /* external data symbol */
|
||
case FLtlsdata:
|
||
assert(SIXTEENBIT || TARGET_SEGMENTED);
|
||
FLUSH();
|
||
s = uev->sp.Vsym; /* symbol pointer */
|
||
reftoident(cseg,offset,s,uev->sp.Voffset,flags);
|
||
break;
|
||
case FLfunc: /* function call */
|
||
assert(SIXTEENBIT || TARGET_SEGMENTED);
|
||
s = uev->sp.Vsym; /* symbol pointer */
|
||
if (tyfarfunc(s->ty()))
|
||
{ /* Large code references are always absolute */
|
||
FLUSH();
|
||
offset += reftoident(cseg,offset,s,0,flags) - 2;
|
||
}
|
||
else if (s->Sseg == cseg &&
|
||
(s->Sclass == SCstatic || s->Sclass == SCglobal) &&
|
||
s->Sxtrnnum == 0 && flags & CFselfrel)
|
||
{ /* if we know it's relative address */
|
||
ad = s->Soffset - OFFSET() - 2;
|
||
goto L1;
|
||
}
|
||
else
|
||
{ FLUSH();
|
||
reftoident(cseg,offset,s,0,flags);
|
||
}
|
||
break;
|
||
case FLblock: /* displacement to another block */
|
||
ad = uev->Vblock->Boffset - OFFSET() - 2;
|
||
#ifdef DEBUG
|
||
{
|
||
targ_ptrdiff_t delta = uev->Vblock->Boffset - OFFSET() - 2;
|
||
assert((signed short)delta == delta);
|
||
}
|
||
#endif
|
||
L1:
|
||
GENP(2,&ad); // displacement
|
||
return;
|
||
|
||
case FLblockoff:
|
||
FLUSH();
|
||
reftocodseg(cseg,offset,uev->Vblock->Boffset);
|
||
break;
|
||
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
offset += 2;
|
||
}
|
||
|
||
STATIC void do8bit(enum FL fl,union evc *uev)
|
||
{ char c;
|
||
targ_ptrdiff_t delta;
|
||
|
||
switch (fl)
|
||
{
|
||
case FLconst:
|
||
c = uev->Vuns;
|
||
break;
|
||
case FLblock:
|
||
delta = uev->Vblock->Boffset - OFFSET() - 1;
|
||
if ((signed char)delta != delta)
|
||
{
|
||
#if MARS
|
||
if (uev->Vblock->Bsrcpos.Slinnum)
|
||
fprintf(stderr, "%s(%d): ", uev->Vblock->Bsrcpos.Sfilename, uev->Vblock->Bsrcpos.Slinnum);
|
||
#endif
|
||
fprintf(stderr, "block displacement of %lld exceeds the maximum offset of -128 to 127.\n", (long long)delta);
|
||
err_exit();
|
||
}
|
||
c = delta;
|
||
#ifdef DEBUG
|
||
assert(uev->Vblock->Boffset > OFFSET() || c != 0x7F);
|
||
#endif
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
fprintf(stderr,"fl = %d\n",fl);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
GEN(c);
|
||
}
|
||
|
||
|
||
/**********************************
|
||
*/
|
||
|
||
#if HYDRATE
|
||
void code_hydrate(code **pc)
|
||
{
|
||
code *c;
|
||
unsigned char ins,rm;
|
||
enum FL fl;
|
||
|
||
assert(pc);
|
||
while (*pc)
|
||
{
|
||
c = (code *) ph_hydrate(pc);
|
||
if (c->Iflags & CFvex)
|
||
ins = vex_inssize(c);
|
||
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(c->Iop >> 8) & 0xFF];
|
||
else if ((c->Iop & 0xFF00) == 0x0F00)
|
||
ins = inssize2[c->Iop & 0xFF];
|
||
else
|
||
ins = inssize[c->Iop & 0xFF];
|
||
switch (c->Iop)
|
||
{
|
||
default:
|
||
break;
|
||
|
||
case ESCAPE | ESClinnum:
|
||
srcpos_hydrate(&c->IEV1.Vsrcpos);
|
||
goto done;
|
||
|
||
case ESCAPE | ESCctor:
|
||
case ESCAPE | ESCdtor:
|
||
el_hydrate(&c->IEV1.Vtor);
|
||
goto done;
|
||
|
||
case ASM:
|
||
ph_hydrate(&c->IEV1.as.bytes);
|
||
goto done;
|
||
}
|
||
if (!(ins & M) ||
|
||
((rm = c->Irm) & 0xC0) == 0xC0)
|
||
goto do2; /* if no first operand */
|
||
if (is32bitaddr(I32,c->Iflags))
|
||
{
|
||
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
else
|
||
{
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 6))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
fl = (enum FL) c->IFL1;
|
||
switch (fl)
|
||
{
|
||
case FLudata:
|
||
case FLdata:
|
||
case FLreg:
|
||
case FLauto:
|
||
case FLbprel:
|
||
case FLpara:
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#endif
|
||
case FLtlsdata:
|
||
case FLfunc:
|
||
case FLpseudo:
|
||
case FLextern:
|
||
case FLtmp:
|
||
assert(flinsymtab[fl]);
|
||
symbol_hydrate(&c->IEVsym1);
|
||
symbol_debug(c->IEVsym1);
|
||
break;
|
||
case FLdatseg:
|
||
case FLfltreg:
|
||
case FLallocatmp:
|
||
case FLcs:
|
||
case FLndp:
|
||
case FLoffset:
|
||
case FLlocalsize:
|
||
case FLconst:
|
||
case FLframehandler:
|
||
assert(!flinsymtab[fl]);
|
||
break;
|
||
case FLcode:
|
||
(void) ph_hydrate(&c->IEV1.Vcode);
|
||
break;
|
||
case FLblock:
|
||
case FLblockoff:
|
||
(void) ph_hydrate(&c->IEV1.Vblock);
|
||
break;
|
||
#if SCPP
|
||
case FLctor:
|
||
case FLdtor:
|
||
el_hydrate(&c->IEV1.Vtor);
|
||
break;
|
||
#endif
|
||
case FLasm:
|
||
(void) ph_hydrate(&c->IEV1.as.bytes);
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
break;
|
||
}
|
||
do2:
|
||
/* Ignore TEST (F6 and F7) opcodes */
|
||
if (!(ins & T))
|
||
goto done; /* if no second operand */
|
||
|
||
fl = (enum FL) c->IFL2;
|
||
switch (fl)
|
||
{
|
||
case FLudata:
|
||
case FLdata:
|
||
case FLreg:
|
||
case FLauto:
|
||
case FLbprel:
|
||
case FLpara:
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#endif
|
||
case FLtlsdata:
|
||
case FLfunc:
|
||
case FLpseudo:
|
||
case FLextern:
|
||
case FLtmp:
|
||
assert(flinsymtab[fl]);
|
||
symbol_hydrate(&c->IEVsym2);
|
||
symbol_debug(c->IEVsym2);
|
||
break;
|
||
case FLdatseg:
|
||
case FLfltreg:
|
||
case FLallocatmp:
|
||
case FLcs:
|
||
case FLndp:
|
||
case FLoffset:
|
||
case FLlocalsize:
|
||
case FLconst:
|
||
case FLframehandler:
|
||
assert(!flinsymtab[fl]);
|
||
break;
|
||
case FLcode:
|
||
(void) ph_hydrate(&c->IEV2.Vcode);
|
||
break;
|
||
case FLblock:
|
||
case FLblockoff:
|
||
(void) ph_hydrate(&c->IEV2.Vblock);
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
break;
|
||
}
|
||
done:
|
||
;
|
||
|
||
pc = &code_next(c);
|
||
}
|
||
}
|
||
#endif
|
||
|
||
/**********************************
|
||
*/
|
||
|
||
#if DEHYDRATE
|
||
void code_dehydrate(code **pc)
|
||
{
|
||
code *c;
|
||
unsigned char ins,rm;
|
||
enum FL fl;
|
||
|
||
while ((c = *pc) != NULL)
|
||
{
|
||
ph_dehydrate(pc);
|
||
|
||
if (c->Iflags & CFvex)
|
||
ins = vex_inssize(c);
|
||
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(c->Iop >> 8) & 0xFF];
|
||
else if ((c->Iop & 0xFF00) == 0x0F00)
|
||
ins = inssize2[c->Iop & 0xFF];
|
||
else
|
||
ins = inssize[c->Iop & 0xFF];
|
||
switch (c->Iop)
|
||
{
|
||
default:
|
||
break;
|
||
|
||
case ESCAPE | ESClinnum:
|
||
srcpos_dehydrate(&c->IEV1.Vsrcpos);
|
||
goto done;
|
||
|
||
case ESCAPE | ESCctor:
|
||
case ESCAPE | ESCdtor:
|
||
el_dehydrate(&c->IEV1.Vtor);
|
||
goto done;
|
||
|
||
case ASM:
|
||
ph_dehydrate(&c->IEV1.as.bytes);
|
||
goto done;
|
||
}
|
||
|
||
if (!(ins & M) ||
|
||
((rm = c->Irm) & 0xC0) == 0xC0)
|
||
goto do2; /* if no first operand */
|
||
if (is32bitaddr(I32,c->Iflags))
|
||
{
|
||
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
else
|
||
{
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 6))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
fl = (enum FL) c->IFL1;
|
||
switch (fl)
|
||
{
|
||
case FLudata:
|
||
case FLdata:
|
||
case FLreg:
|
||
case FLauto:
|
||
case FLbprel:
|
||
case FLpara:
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#endif
|
||
case FLtlsdata:
|
||
case FLfunc:
|
||
case FLpseudo:
|
||
case FLextern:
|
||
case FLtmp:
|
||
assert(flinsymtab[fl]);
|
||
symbol_dehydrate(&c->IEVsym1);
|
||
break;
|
||
case FLdatseg:
|
||
case FLfltreg:
|
||
case FLallocatmp:
|
||
case FLcs:
|
||
case FLndp:
|
||
case FLoffset:
|
||
case FLlocalsize:
|
||
case FLconst:
|
||
case FLframehandler:
|
||
assert(!flinsymtab[fl]);
|
||
break;
|
||
case FLcode:
|
||
ph_dehydrate(&c->IEV1.Vcode);
|
||
break;
|
||
case FLblock:
|
||
case FLblockoff:
|
||
ph_dehydrate(&c->IEV1.Vblock);
|
||
break;
|
||
#if SCPP
|
||
case FLctor:
|
||
case FLdtor:
|
||
el_dehydrate(&c->IEV1.Vtor);
|
||
break;
|
||
#endif
|
||
case FLasm:
|
||
ph_dehydrate(&c->IEV1.as.bytes);
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
break;
|
||
}
|
||
do2:
|
||
/* Ignore TEST (F6 and F7) opcodes */
|
||
if (!(ins & T))
|
||
goto done; /* if no second operand */
|
||
|
||
fl = (enum FL) c->IFL2;
|
||
switch (fl)
|
||
{
|
||
case FLudata:
|
||
case FLdata:
|
||
case FLreg:
|
||
case FLauto:
|
||
case FLbprel:
|
||
case FLpara:
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#endif
|
||
case FLtlsdata:
|
||
case FLfunc:
|
||
case FLpseudo:
|
||
case FLextern:
|
||
case FLtmp:
|
||
assert(flinsymtab[fl]);
|
||
symbol_dehydrate(&c->IEVsym2);
|
||
break;
|
||
case FLdatseg:
|
||
case FLfltreg:
|
||
case FLallocatmp:
|
||
case FLcs:
|
||
case FLndp:
|
||
case FLoffset:
|
||
case FLlocalsize:
|
||
case FLconst:
|
||
case FLframehandler:
|
||
assert(!flinsymtab[fl]);
|
||
break;
|
||
case FLcode:
|
||
ph_dehydrate(&c->IEV2.Vcode);
|
||
break;
|
||
case FLblock:
|
||
case FLblockoff:
|
||
ph_dehydrate(&c->IEV2.Vblock);
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
break;
|
||
}
|
||
done:
|
||
;
|
||
pc = &code_next(c);
|
||
}
|
||
}
|
||
#endif
|
||
|
||
/***************************
|
||
* Debug code to dump code stucture.
|
||
*/
|
||
|
||
#if DEBUG
|
||
|
||
void WRcodlst(code *c)
|
||
{ for (; c; c = code_next(c))
|
||
c->print();
|
||
}
|
||
|
||
void code::print()
|
||
{
|
||
unsigned char ins;
|
||
unsigned char rexb;
|
||
code *c = this;
|
||
|
||
if (c == CNIL)
|
||
{ printf("code 0\n");
|
||
return;
|
||
}
|
||
|
||
unsigned op = c->Iop;
|
||
if (c->Iflags & CFvex)
|
||
ins = vex_inssize(c);
|
||
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(op >> 8) & 0xFF];
|
||
else if ((c->Iop & 0xFF00) == 0x0F00)
|
||
ins = inssize2[op & 0xFF];
|
||
else
|
||
ins = inssize[op & 0xFF];
|
||
|
||
printf("code %p: nxt=%p ",c,code_next(c));
|
||
|
||
if (c->Iflags & CFvex)
|
||
{
|
||
if (c->Iflags & CFvex3)
|
||
{ printf("vex=0xC4");
|
||
printf(" 0x%02X", VEX3_B1(c->Ivex));
|
||
printf(" 0x%02X", VEX3_B2(c->Ivex));
|
||
rexb =
|
||
( c->Ivex.w ? REX_W : 0) |
|
||
(!c->Ivex.r ? REX_R : 0) |
|
||
(!c->Ivex.x ? REX_X : 0) |
|
||
(!c->Ivex.b ? REX_B : 0);
|
||
}
|
||
else
|
||
{ printf("vex=0xC5");
|
||
printf(" 0x%02X", VEX2_B1(c->Ivex));
|
||
rexb = !c->Ivex.r ? REX_R : 0;
|
||
}
|
||
printf(" ");
|
||
}
|
||
else
|
||
rexb = c->Irex;
|
||
|
||
if (rexb)
|
||
{ printf("rex=0x%02X ", c->Irex);
|
||
if (rexb & REX_W)
|
||
printf("W");
|
||
if (rexb & REX_R)
|
||
printf("R");
|
||
if (rexb & REX_X)
|
||
printf("X");
|
||
if (rexb & REX_B)
|
||
printf("B");
|
||
printf(" ");
|
||
}
|
||
printf("op=0x%02X",op);
|
||
|
||
if ((op & 0xFF) == ESCAPE)
|
||
{ if ((op & 0xFF00) == ESClinnum)
|
||
{ printf(" linnum = %d\n",c->IEV1.Vsrcpos.Slinnum);
|
||
return;
|
||
}
|
||
printf(" ESCAPE %d",c->Iop >> 8);
|
||
}
|
||
if (c->Iflags)
|
||
printf(" flg=%x",c->Iflags);
|
||
if (ins & M)
|
||
{ unsigned rm = c->Irm;
|
||
printf(" rm=0x%02X=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7);
|
||
if (!I16 && issib(rm))
|
||
{ unsigned char sib = c->Isib;
|
||
printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7);
|
||
}
|
||
if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40)
|
||
{
|
||
switch (c->IFL1)
|
||
{
|
||
case FLconst:
|
||
case FLoffset:
|
||
printf(" int = %4d",c->IEV1.Vuns);
|
||
break;
|
||
case FLblock:
|
||
printf(" block = %p",c->IEV1.Vblock);
|
||
break;
|
||
case FLswitch:
|
||
case FLblockoff:
|
||
case FLlocalsize:
|
||
case FLframehandler:
|
||
case 0:
|
||
break;
|
||
case FLdatseg:
|
||
printf(" %d.%llx",c->IEVseg1,(unsigned long long)c->IEVpointer1);
|
||
break;
|
||
case FLauto:
|
||
case FLreg:
|
||
case FLdata:
|
||
case FLudata:
|
||
case FLpara:
|
||
case FLtmp:
|
||
case FLbprel:
|
||
case FLtlsdata:
|
||
printf(" sym='%s'",c->IEVsym1->Sident);
|
||
break;
|
||
case FLextern:
|
||
printf(" FLextern offset = %4d",(int)c->IEVoffset1);
|
||
break;
|
||
default:
|
||
WRFL((enum FL)c->IFL1);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
if (ins & T)
|
||
{ printf(" "); WRFL((enum FL)c->IFL2);
|
||
switch (c->IFL2)
|
||
{
|
||
case FLconst:
|
||
printf(" int = %4d",c->IEV2.Vuns);
|
||
break;
|
||
case FLblock:
|
||
printf(" block = %p",c->IEV2.Vblock);
|
||
break;
|
||
case FLswitch:
|
||
case FLblockoff:
|
||
case 0:
|
||
case FLlocalsize:
|
||
case FLframehandler:
|
||
break;
|
||
case FLdatseg:
|
||
printf(" %d.%llx",c->IEVseg2,(unsigned long long)c->IEVpointer2);
|
||
break;
|
||
case FLauto:
|
||
case FLreg:
|
||
case FLpara:
|
||
case FLtmp:
|
||
case FLbprel:
|
||
case FLfunc:
|
||
case FLdata:
|
||
case FLudata:
|
||
case FLtlsdata:
|
||
printf(" sym='%s'",c->IEVsym2->Sident);
|
||
break;
|
||
case FLcode:
|
||
printf(" code = %p",c->IEV2.Vcode);
|
||
break;
|
||
default:
|
||
WRFL((enum FL)c->IFL2);
|
||
break;
|
||
}
|
||
}
|
||
printf("\n");
|
||
}
|
||
#endif
|
||
|
||
#endif // !SPP
|