ldc/gen/abi-x86-64.cpp

/* TargetABI implementation for x86-64.
 * Written for LDC by Frits van Bommel in 2009.
 *
 * extern(D) follows no particular external ABI, but tries to be smart about
 * passing structs and returning them. It should probably be reviewed if the
 * way LLVM implements fastcc on this platform ever changes.
 * (Specifically, the number of return registers of various types is hardcoded)
 *
 *
 * extern(C) implements the C calling convention for x86-64, as found in
 * http://www.x86-64.org/documentation/abi-0.99.pdf
 *
 * Note:
 *   Where a discrepancy was found between llvm-gcc and the ABI documentation,
 *   llvm-gcc behavior was used for compatibility (after it was verified that
 *   regular gcc has the same behavior).
 *
 * LLVM gets it right for most types, but complex numbers and structs need some
 * help. To make sure it gets those right we essentially bitcast small structs
 * to a type to which LLVM assigns the appropriate registers, and pass that
 * instead. Structs that are required to be passed in memory are explicitly
 * marked with the ByVal attribute to ensure no part of them ends up in
 * registers when only a subset of the desired registers are available.
 *
 * We don't perform the same transformation for D-specific types that contain
 * multiple parts, such as dynamic arrays and delegates. They're passed as if
 * the parts were passed as separate parameters. This helps make things like
 * printf("%.*s", o.toString()) work as expected; if we didn't do this that
 * wouldn't work if there were 4 other integer/pointer arguments before the
 * toString() call because the string got bumped to memory with one integer
 * register still free. Keeping it untransformed puts the length in a register
 * and the pointer in memory, as printf expects it.
 */

#include "mtype.h"
#include "declaration.h"
#include "aggregate.h"

#include "gen/irstate.h"
#include "gen/llvm.h"
#include "gen/tollvm.h"
#include "gen/logger.h"
#include "gen/dvalue.h"
#include "gen/llvmhelpers.h"
#include "gen/abi.h"
#include "gen/abi-x86-64.h"
#include "gen/abi-generic.h"
#include "ir/irfunction.h"

#include <cassert>
#include <map>
#include <string>
#include <utility>

// Implementation details for extern(C)
namespace {
    /**
     * This function helps filter out things that look like structs to C,
     * but should be passed to C in separate arguments anyway.
     *
     * (e.g. dynamic arrays are passed as separate length and ptr. This
     * is both less work and makes printf("%.*s", o.toString()) work)
     */
    inline bool keepUnchanged(Type* t) {
        switch (t->ty) {
            case Tarray:    // dynamic array
            case Taarray:   // assoc array
            case Tdelegate:
                return true;

            default:
                return false;
        }
    }

    enum ArgClass {
        Integer, Sse, SseUp, X87, X87Up, ComplexX87, NoClass, Memory
    };

    struct Classification {
        bool isMemory;
        ArgClass classes[2];

        Classification() : isMemory(false) {
            classes[0] = NoClass;
            classes[1] = NoClass;
        }

        void addField(unsigned offset, ArgClass cl) {
            if (isMemory)
                return;

            // Note that we don't need to bother checking if it crosses 8 bytes.
            // We don't get here with unaligned fields, and anything that can be
            // big enough to cross 8 bytes (cdoubles, reals, structs and arrays)
            // is special-cased in classifyType()
            int idx = (offset < 8 ? 0 : 1);

            ArgClass nw = merge(classes[idx], cl);
            if (nw != classes[idx]) {
                classes[idx] = nw;

                if (nw == Memory) {
                    classes[1-idx] = Memory;
                    isMemory = true;
                }
            }
        }

    private:
        ArgClass merge(ArgClass accum, ArgClass cl) {
            if (accum == cl)
                return accum;
            if (accum == NoClass)
                return cl;
            if (cl == NoClass)
                return accum;
            if (accum == Memory || cl == Memory)
                return Memory;
            if (accum == Integer || cl == Integer)
                return Integer;
            if (accum == X87 || accum == X87Up || accum == ComplexX87 ||
                cl == X87 || cl == X87Up || cl == ComplexX87)
                return Memory;
            return Sse;
        }
    };

    void classifyType(Classification& accum, Type* ty, d_uns64 offset) {
        if (Logger::enabled())
            Logger::cout() << "Classifying " << ty->toChars() << " @ " << offset << '\n';

        ty = ty->toBasetype();

        if (ty->isintegral() || ty->ty == Tpointer) {
            accum.addField(offset, Integer);
        } else if (ty->ty == Tfloat80 || ty->ty == Timaginary80) {
            accum.addField(offset, X87);
            accum.addField(offset+8, X87Up);
        } else if (ty->ty == Tcomplex80) {
            accum.addField(offset, ComplexX87);
            // make sure other half knows about it too:
            accum.addField(offset+16, ComplexX87);
        } else if (ty->ty == Tcomplex64) {
            accum.addField(offset, Sse);
            accum.addField(offset+8, Sse);
        } else if (ty->ty == Tcomplex32) {
            accum.addField(offset, Sse);
            accum.addField(offset+4, Sse);
        } else if (ty->isfloating()) {
            accum.addField(offset, Sse);
        } else if (ty->size() > 16 || hasUnalignedFields(ty)) {
            // This isn't creal, yet is > 16 bytes, so pass in memory.
            // Must be after creal case but before arrays and structs,
            // the other types that can get bigger than 16 bytes
            accum.addField(offset, Memory);
        } else if (ty->ty == Tsarray) {
            Type* eltType = ty->nextOf();
            d_uns64 eltsize = eltType->size();
            if (eltsize > 0) {
                d_uns64 dim = ty->size() / eltsize;
                assert(dim <= 16
                        && "Array of non-empty type <= 16 bytes but > 16 elements?");
                for (d_uns64 i = 0; i < dim; i++) {
                    classifyType(accum, eltType, offset);
                    offset += eltsize;
                }
            }
        } else if (ty->ty == Tstruct) {
            Array* fields = &((TypeStruct*) ty)->sym->fields;
            for (size_t i = 0; i < fields->dim; i++) {
                VarDeclaration* field = (VarDeclaration*) fields->data[i];
                classifyType(accum, field->type, offset + field->offset);
            }
        } else {
            if (Logger::enabled())
                Logger::cout() << "x86-64 ABI: Implicitly handled type: "
                               << ty->toChars() << '\n';
            // arrays, delegates, etc. (pointer-sized fields, <= 16 bytes)
            assert(offset == 0 || offset == 8
                    && "must be aligned and doesn't fit otherwise");
            assert(ty->size() % 8 == 0 && "Not a multiple of pointer size?");

            accum.addField(offset, Integer);
            if (ty->size() > 8)
                accum.addField(offset+8, Integer);
        }
    }

    Classification classify(Type* ty) {
        typedef std::map<Type*, Classification> ClassMap;
        static ClassMap cache;

        ClassMap::iterator it = cache.find(ty);
        if (it != cache.end()) {
            return it->second;
        } else {
            Classification cl;
            classifyType(cl, ty, 0);
            cache[ty] = cl;
            return cl;
        }
    }

    /// Returns the type to pass as, or null if no transformation is needed.
    LLType* getAbiType(Type* ty) {
        ty = ty->toBasetype();

        // First, check if there's any need of a transformation:

        if (keepUnchanged(ty))
            return 0;

        if (ty->ty != Tcomplex32 && ty->ty != Tstruct)
            return 0; // Nothing to do,

        Classification cl = classify(ty);
        assert(!cl.isMemory);

        if (cl.classes[0] == NoClass) {
            assert(cl.classes[1] == NoClass && "Non-empty struct with empty first half?");
            return 0; // Empty structs should also be handled correctly by LLVM
        }

        // Okay, we may need to transform. Figure out a canonical type:

        std::vector<const LLType*> parts;

        unsigned size = ty->size();

        switch (cl.classes[0]) {
            case Integer: {
                unsigned bits = (size >= 8 ? 64 : (size * 8));
                parts.push_back(LLIntegerType::get(gIR->context(), bits));
                break;
            }

            case Sse:
                parts.push_back(size <= 4 ? LLType::getFloatTy(gIR->context()) : LLType::getDoubleTy(gIR->context()));
                break;

            case X87:
                assert(cl.classes[1] == X87Up && "Upper half of real not X87Up?");
                /// The type only contains a single real/ireal field,
                /// so just use that type.
                return const_cast<LLType*>(LLType::getX86_FP80Ty(gIR->context()));

            default:
                assert(0 && "Unanticipated argument class");
        }

        switch(cl.classes[1]) {
            case NoClass:
                assert(parts.size() == 1);
                // No need to use a single-element struct type.
                // Just use the element type instead.
                return const_cast<LLType*>(parts[0]);
                break;

            case Integer: {
                assert(size > 8);
                unsigned bits = (size - 8) * 8;
                parts.push_back(LLIntegerType::get(gIR->context(), bits));
                break;
            }
            case Sse:
                parts.push_back(size <= 12 ? LLType::getFloatTy(gIR->context()) : LLType::getDoubleTy(gIR->context()));
                break;

            case X87Up:
                if(cl.classes[0] == X87) {
                    // This won't happen: it was short-circuited while
                    // processing the first half.
                } else {
                    // I can't find this anywhere in the ABI documentation,
                    // but this is what gcc does (both regular and llvm-gcc).
                    // (This triggers for types like union { real r; byte b; })
                    parts.push_back(LLType::getDoubleTy(gIR->context()));
                }
                break;

            default:
                assert(0 && "Unanticipated argument class for second half");
        }
        return LLStructType::get(gIR->context(), parts);
    }
}


// Implementation details for extern(D)
namespace x86_64_D_cc {
    struct DRegCount {
        unsigned ints;
        unsigned sse;
        unsigned x87;

        DRegCount(unsigned ints_, unsigned sse_, unsigned x87_)
        : ints(ints_), sse(sse_), x87(x87_) {}
    };

    // Count the number of registers needed for a simple type.
    // (Not a struct or static array)
    DRegCount regsNeededForSimpleType(Type* t) {
        DRegCount r(0, 0, 0);
        switch(t->ty) {
            case Tstruct:
            case Tsarray:
                assert(0 && "Not a simple type!");
                // Return huge numbers if assertions are disabled, so it'll always get
                // bumped to memory.
                r.ints = r.sse = r.x87 = (unsigned)-1;
                break;

            // Floats, doubles and such are passed in SSE registers
            case Tfloat32:
            case Tfloat64:
            case Timaginary32:
            case Timaginary64:
                r.sse = 1;
                break;

            case Tcomplex32:
            case Tcomplex64:
                r.sse = 2;
                break;

            // Reals, ireals and creals are passed in x87 registers
            case Tfloat80:
            case Timaginary80:
                r.x87 = 1;
                break;

            case Tcomplex80:
                r.x87 = 2;
                break;

            // Anything else is passed in one or two integer registers,
            // depending on its size.
            default: {
                int needed = (t->size() + 7) / 8;
                assert(needed <= 2);
                r.ints = needed;
                break;
            }
        }
        return r;
    }

    // Returns true if it's possible (and a good idea) to pass the struct in the
    // specified number of registers.
    // (May return false if it's a bad idea to pass the type in registers for
    // reasons other than it not fitting)
    // Note that if true is returned, 'left' is also modified to contain the
    // number of registers left. This property is used in the recursive case.
    // If false is returned, 'left' is garbage.
    bool shouldPassStructInRegs(TypeStruct* t, DRegCount& left) {
        // If it has unaligned fields, there's probably a reason for it,
        // so keep it in memory.
        if (hasUnalignedFields(t))
            return false;

        Array* fields = &t->sym->fields;

        if (fields->dim == 0)
            return false;

        d_uns64 nextbyte = 0;
        for (d_uns64 i = 0; i < fields->dim; i++) {
            VarDeclaration* field = (VarDeclaration*) fields->data[i];

            // This depends on ascending order of field offsets in structs
            // without overlapping fields.
            if (field->offset < nextbyte) {
                // Don't return unions (or structs containing them) in registers.
                return false;
            }
            nextbyte = field->offset + field->type->size();

            switch (field->type->ty) {
                case Tstruct:
                    if (!shouldPassStructInRegs((TypeStruct*) field->type, left))
                        return false;
                    break;

                case Tsarray:
                    // Don't return static arrays in registers
                    // (indexing registers doesn't work well)
                    return false;

                default: {
                    DRegCount needed = regsNeededForSimpleType(field->type);
                    if (needed.ints > left.ints || needed.sse > left.sse || needed.x87 > left.x87)
                        return false;
                    left.ints -= needed.ints;
                    left.sse -= needed.sse;
                    left.x87 -= needed.x87;
                    break;
                }
            }
        }
        return true;
    }

    // Returns true if the struct fits in return registers in the x86-64 fastcc
    // calling convention.
    bool retStructInRegs(TypeStruct* st) {
        // 'fastcc' allows returns in up to two registers of each kind:
        DRegCount state(2, 2, 2);
        return shouldPassStructInRegs(st, state);
    }

    // Heuristic for determining whether to pass a struct type directly or
    // bump it to memory.
    bool passStructTypeDirectly(TypeStruct* st) {
        // If the type fits in a reasonable number of registers,
        // pass it directly.
        // This does not necessarily mean it will actually be passed in
        // registers. For example, x87 registers are never actually used for
        // parameters.
        DRegCount state(2, 2, 2);
        return shouldPassStructInRegs(st, state);

        // This doesn't work well: Since the register count can differ depending
        // on backend options, there's no way to be exact anyway.
        /*
        // Regular fastcc:      6 int, 8 sse, 0 x87
        // fastcc + tailcall:   5 int, 8 sse, 0 x87
        RegCount state(5, 8, 0);
        */
    }
}

////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////


/// Just store to memory and it's readable as the other type.
struct X86_64_C_struct_rewrite : ABIRewrite {
    // Get struct from ABI-mangled representation
    LLValue* get(Type* dty, DValue* v)
    {
        LLValue* lval;
        if (v->isLVal()) {
            lval = v->getLVal();
        } else {
            // No memory location, create one.
            LLValue* rval = v->getRVal();
            lval = DtoRawAlloca(rval->getType(), 0);
            DtoStore(rval, lval);
        }

        const LLType* pTy = getPtrToType(DtoType(dty));
        return DtoLoad(DtoBitCast(lval, pTy), "get-result");
    }

    // Get struct from ABI-mangled representation, and store in the provided location.
    void getL(Type* dty, DValue* v, llvm::Value* lval) {
        LLValue* rval = v->getRVal();
        const LLType* pTy = getPtrToType(rval->getType());
        DtoStore(rval, DtoBitCast(lval, pTy));
    }

    // Turn a struct into an ABI-mangled representation
    LLValue* put(Type* dty, DValue* v)
    {
        LLValue* lval;
        if (v->isLVal()) {
            lval = v->getLVal();
        } else {
            // No memory location, create one.
            LLValue* rval = v->getRVal();
            lval = DtoRawAlloca(rval->getType(), 0);
            DtoStore(rval, lval);
        }

        LLType* abiTy = getAbiType(dty);
        assert(abiTy && "Why are we rewriting a non-rewritten type?");

        const LLType* pTy = getPtrToType(abiTy);
        return DtoLoad(DtoBitCast(lval, pTy), "put-result");
    }

    /// should return the transformed type for this rewrite
    const LLType* type(Type* dty, const LLType* t)
    {
        return getAbiType(dty);
    }
};


struct RegCount {
    unsigned char int_regs, sse_regs;
};


struct X86_64TargetABI : TargetABI {
    X86_64_C_struct_rewrite struct_rewrite;
    RemoveStructPadding remove_padding;

    void newFunctionType(TypeFunction* tf) {
        funcTypeStack.push_back(FuncTypeData(tf->linkage));
    }

    bool returnInArg(TypeFunction* tf);

    bool passByVal(Type* t);

    void rewriteFunctionType(TypeFunction* tf);

    void doneWithFunctionType() {
        funcTypeStack.pop_back();
    }

private:
    struct FuncTypeData {
        LINK linkage;       // Linkage of the function type currently under construction
        RegCount state;     // bookkeeping for extern(C) parameter registers

        FuncTypeData(LINK linkage_)
        : linkage(linkage_)
        {
            state.int_regs = 6;
            state.sse_regs = 8;
        }
    };
    std::vector<FuncTypeData> funcTypeStack;

    LINK linkage() {
        assert(funcTypeStack.size() != 0);
        return funcTypeStack.back().linkage;
    }

    RegCount& state() {
        assert(funcTypeStack.size() != 0);
        return funcTypeStack.back().state;
    }

    void fixup_D(IrFuncTyArg& arg);
    void fixup(IrFuncTyArg& arg);
};


// The public getter for abi.cpp
TargetABI* getX86_64TargetABI() {
    return new X86_64TargetABI;
}


bool X86_64TargetABI::returnInArg(TypeFunction* tf) {
    assert(linkage() == tf->linkage);
    Type* rt = tf->next->toBasetype();

    if (tf->linkage == LINKd) {
#if DMDV2
        if (tf->isref)
            return false;
#endif

        // All non-structs can be returned in registers.
        // FIXME: Update calling convention for static array returns
        if (rt->ty != Tstruct)
            return false;

        // Try to figure out whether the struct fits in return registers
        // and whether it's a good idea to put it there.
        return !x86_64_D_cc::retStructInRegs((TypeStruct*) rt);
    } else {
        if (rt == Type::tvoid || keepUnchanged(rt))
            return false;

        Classification cl = classify(rt);
        if (cl.isMemory) {
            assert(state().int_regs > 0
                && "No int registers available when determining sret-ness?");
            // An sret parameter takes an integer register.
            state().int_regs--;
            return true;
        }
        return false;
    }
}

bool X86_64TargetABI::passByVal(Type* t) {
    t = t->toBasetype();
    if (linkage() == LINKd) {
        if (t->ty != Tstruct)
            return false;

        // Try to be smart about which structs are passed in memory.
        return !x86_64_D_cc::passStructTypeDirectly((TypeStruct*) t);
    } else {
        // This implements the C calling convention for x86-64.
        // It might not be correct for other calling conventions.
        Classification cl = classify(t);
        if (cl.isMemory)
            return true;

        // Figure out how many registers we want for this arg:
        RegCount wanted = { 0, 0 };
        for (int i = 0 ; i < 2; i++) {
            if (cl.classes[i] == Integer)
                wanted.int_regs++;
            else if (cl.classes[i] == Sse)
                wanted.sse_regs++;
        }

        // See if they're available:
        RegCount& state = this->state();
        if (wanted.int_regs <= state.int_regs && wanted.sse_regs <= state.sse_regs) {
            state.int_regs -= wanted.int_regs;
            state.sse_regs -= wanted.sse_regs;
        } else {
            if (keepUnchanged(t)) {
                // Not enough registers available, but this is passed as if it's
                // multiple arguments. Just use the registers there are,
                // automatically spilling the rest to memory.
                if (wanted.int_regs > state.int_regs)
                    state.int_regs = 0;
                else
                    state.int_regs -= wanted.int_regs;

                if (wanted.sse_regs > state.sse_regs)
                    state.sse_regs = 0;
                else
                    state.sse_regs -= wanted.sse_regs;
            } else if (t->iscomplex() || t->ty == Tstruct) {
                // Spill entirely to memory, even if some of the registers are
                // available.

                // FIXME: Don't do this if *none* of the wanted registers are available,
                //        (i.e. only when absolutely necessary for abi-compliance)
                //        so it gets alloca'd by the callee and -scalarrepl can
                //        more easily break it up?
                // Note: this won't be necessary if the following LLVM bug gets fixed:
                //       http://llvm.org/bugs/show_bug.cgi?id=3741
                return true;
            } else {
                assert(t == Type::tfloat80 || t == Type::timaginary80 || t->size() <= 8
                    && "What other big types are there?"); // other than static arrays...
                // In any case, they shouldn't be represented as structs in LLVM:
                assert(!isaStruct(DtoType(t)));
            }
        }
        // Everything else that's passed in memory is handled by LLVM.
        return false;
    }
}

// Helper function for rewriteFunctionType.
// Structs passed or returned in registers are passed here
// to get their padding removed (if necessary).
void X86_64TargetABI::fixup_D(IrFuncTyArg& arg) {
    assert(arg.type->toBasetype()->ty == Tstruct);
    LLType* abiTy = DtoUnpaddedStructType(arg.type->toBasetype());

    if (abiTy && abiTy != arg.ltype) {
        arg.ltype = abiTy;
        arg.rewrite = &remove_padding;
    }
}

// Helper function for rewriteFunctionType.
// Return type and parameters are passed here (unless they're already in memory)
// to get the rewrite applied (if necessary).
void X86_64TargetABI::fixup(IrFuncTyArg& arg) {
    LLType* abiTy = getAbiType(arg.type);

    if (abiTy && abiTy != arg.ltype) {
        assert(arg.type == Type::tcomplex32 || arg.type->ty == Tstruct);
        arg.ltype = abiTy;
        arg.rewrite = &struct_rewrite;
    }
}

void X86_64TargetABI::rewriteFunctionType(TypeFunction* tf) {
    IrFuncTy& fty = tf->fty;

    if (tf->linkage == LINKd) {
        if (!fty.arg_sret) {
            Type* rt = fty.ret->type->toBasetype();
            if (rt->ty == Tstruct && !fty.ret->byref)  {
                Logger::println("x86-64 D ABI: Transforming return type");
                fixup_D(*fty.ret);
            }
        }

#if DMDV1
        if (fty.arg_this) {
            fty.arg_this->attrs |= llvm::Attribute::Nest;
        }
        if (fty.arg_nest) {
            fty.arg_nest->attrs |= llvm::Attribute::Nest;
        }
#endif

        Logger::println("x86-64 D ABI: Transforming arguments");
        LOG_SCOPE;

        for (IrFuncTy::ArgIter I = fty.args.begin(), E = fty.args.end(); I != E; ++I) {
            IrFuncTyArg& arg = **I;

            if (Logger::enabled())
                Logger::cout() << "Arg: " << arg.type->toChars() << '\n';

            // Arguments that are in memory are of no interest to us.
            if (arg.byref)
                continue;

            Type* ty = arg.type->toBasetype();
            if (ty->ty == Tstruct)
                fixup_D(arg);

            if (Logger::enabled())
                Logger::cout() << "New arg type: " << *arg.ltype << '\n';
        }
    } else {
        // TODO: See if this is correct for more than just extern(C).

        if (!fty.arg_sret) {
            Logger::println("x86-64 ABI: Transforming return type");
            Type* rt = fty.ret->type->toBasetype();
            if (rt != Type::tvoid)
                fixup(*fty.ret);
        }


        Logger::println("x86-64 ABI: Transforming arguments");
        LOG_SCOPE;

        for (IrFuncTy::ArgIter I = fty.args.begin(), E = fty.args.end(); I != E; ++I) {
            IrFuncTyArg& arg = **I;

            if (Logger::enabled())
                Logger::cout() << "Arg: " << arg.type->toChars() << '\n';

            // Arguments that are in memory are of no interest to us.
            if (arg.byref)
                continue;

            Type* ty = arg.type->toBasetype();

            fixup(arg);
            if (Logger::enabled())
                Logger::cout() << "New arg type: " << *arg.ltype << '\n';
        }
    }
}