diff --git a/gen/abi-x86-64.cpp b/gen/abi-x86-64.cpp
new file mode 100644
index 00000000..dad7b2e4
--- /dev/null
+++ b/gen/abi-x86-64.cpp
@@ -0,0 +1,680 @@
+/* TargetABI implementation for x86-64.
+ * Written for LDC by Frits van Bommel in 2009.
+ * 
+ * extern(D) follows no particular external ABI, but tries to be smart about
+ * passing structs and returning them. It should probably be reviewed if the
+ * way LLVM implements fastcc on this platform ever changes.
+ * (Specifically, the number of return registers of various types is hardcoded)
+ * 
+ * 
+ * extern(C) implements the C calling convention for x86-64, as found in
+ * http://www.x86-64.org/documentation/abi-0.99.pdf
+ * 
+ * Note:
+ *   Where a discrepancy was found between llvm-gcc and the ABI documentation,
+ *   llvm-gcc behavior was used for compatibility (after it was verified that
+ *   regular gcc has the same behavior).
+ * 
+ * LLVM gets it right for most types, but complex numbers and structs need some
+ * help. To make sure it gets those right we essentially bitcast small structs
+ * to a type to which LLVM assigns the appropriate registers, and pass that
+ * instead. Structs that are required to be passed in memory are explicitly
+ * marked with the ByVal attribute to ensure no part of them ends up in
+ * registers when only a subset of the desired registers are available.
+ * 
+ * We don't perform the same transformation for D-specific types that contain
+ * multiple parts, such as dynamic arrays and delegates. They're passed as if
+ * the parts were passed as separate parameters. This helps make things like
+ * printf("%.*s", o.toString()) work as expected; if we didn't do this that
+ * wouldn't work if there were 4 other integer/pointer arguments before the
+ * toString() call because the string got bumped to memory with one integer
+ * register still free. Keeping it untransformed puts the length in a register
+ * and the pointer in memory, as printf expects it.
+ */
+
+#include "dmd/mtype.h"
+#include "dmd/declaration.h"
+#include "dmd/aggregate.h"
+
+#include "gen/llvm.h"
+#include "gen/tollvm.h"
+#include "gen/logger.h"
+#include "gen/dvalue.h"
+#include "gen/llvmhelpers.h"
+#include "gen/abi.h"
+#include "gen/abi-x86-64.h"
+#include "ir/irfunction.h"
+
+#include <cassert>
+#include <map>
+#include <string>
+#include <utility>
+
+// Implementation details for extern(C)
+namespace {
+    /**
+     * This function helps filter out things that look like structs to C,
+     * but should be passed to C in separate arguments anyway.
+     * 
+     * (e.g. dynamic arrays are passed as separate length and ptr. This
+     * is both less work and makes printf("%.*s", o.toString()) work)
+     */
+    inline bool keepUnchanged(Type* t) {
+        switch (t->ty) {
+            case Tarray:    // dynamic array
+            case Taarray:   // assoc array
+            case Tdelegate:
+                return true;
+            
+            default:
+                return false;
+        }
+    }
+    
+    enum ArgClass {
+        Integer, Sse, SseUp, X87, X87Up, ComplexX87, NoClass, Memory
+    };
+    
+    struct Classification {
+        bool isMemory;
+        ArgClass classes[2];
+        
+        Classification() : isMemory(false) {
+            classes[0] = NoClass;
+            classes[1] = NoClass;
+        }
+        
+        void addField(unsigned offset, ArgClass cl) {
+            if (isMemory)
+                return;
+            
+            // Note that we don't need to bother checking if it crosses 8 bytes.
+            // We don't get here with unaligned fields, and anything that can be
+            // big enough to cross 8 bytes (cdoubles, reals, structs and arrays)
+            // is special-cased in classifyType()
+            int idx = (offset < 8 ? 0 : 1);
+            
+            ArgClass nw = merge(classes[idx], cl);
+            if (nw != classes[idx]) {
+                classes[idx] = nw;
+                
+                if (nw == Memory) {
+                    classes[1-idx] = Memory;
+                    isMemory = true;
+                }
+            }
+        }
+        
+    private:
+        ArgClass merge(ArgClass accum, ArgClass cl) {
+            if (accum == cl)
+                return accum;
+            if (accum == NoClass)
+                return cl;
+            if (cl == NoClass)
+                return accum;
+            if (accum == Memory || cl == Memory)
+                return Memory;
+            if (accum == Integer || cl == Integer)
+                return Integer;
+            if (accum == X87 || accum == X87Up || accum == ComplexX87 ||
+                cl == X87 || cl == X87Up || cl == ComplexX87)
+                return Memory;
+            return Sse;
+        }
+    };
+    
+    void classifyType(Classification& accum, Type* ty, d_uns64 offset) {
+        if (Logger::enabled())
+            Logger::cout() << "Classifying " << ty->toChars() << " @ " << offset << '\n';
+        
+        ty = ty->toBasetype();
+        
+        if (ty->isintegral() || ty->ty == Tpointer) {
+            accum.addField(offset, Integer);
+        } else if (ty->ty == Tfloat80 || ty->ty == Timaginary80) {
+            accum.addField(offset, X87);
+            accum.addField(offset+8, X87Up);
+        } else if (ty->ty == Tcomplex80) {
+            accum.addField(offset, ComplexX87);
+            // make sure other half knows about it too:
+            accum.addField(offset+16, ComplexX87);
+        } else if (ty->ty == Tcomplex64) {
+            accum.addField(offset, Sse);
+            accum.addField(offset+8, Sse);
+        } else if (ty->ty == Tcomplex32) {
+            accum.addField(offset, Sse);
+            accum.addField(offset+4, Sse);
+        } else if (ty->isfloating()) {
+            accum.addField(offset, Sse);
+        } else if (ty->size() > 16 || hasUnalignedFields(ty)) {
+            // This isn't creal, yet is > 16 bytes, so pass in memory.
+            // Must be after creal case but before arrays and structs,
+            // the other types that can get bigger than 16 bytes
+            accum.addField(offset, Memory);
+        } else if (ty->ty == Tsarray) {
+            d_uns64 eltsize = ty->next->size();
+            if (eltsize > 0) {
+                d_uns64 dim = ty->size() / eltsize;
+                assert(dim <= 16
+                        && "Array of non-empty type <= 16 bytes but > 16 elements?");
+                for (d_uns64 i = 0; i < dim; i++) {
+                    classifyType(accum, ty->next, offset);
+                    offset += eltsize;
+                }
+            }
+        } else if (ty->ty == Tstruct) {
+            Array* fields = &((TypeStruct*) ty)->sym->fields;
+            for (size_t i = 0; i < fields->dim; i++) {
+                VarDeclaration* field = (VarDeclaration*) fields->data[i];
+                classifyType(accum, field->type, offset + field->offset);
+            }
+        } else {
+            if (Logger::enabled())
+                Logger::cout() << "x86-64 ABI: Implicitly handled type: "
+                               << ty->toChars() << '\n';
+            // arrays, delegates, etc. (pointer-sized fields, <= 16 bytes)
+            assert(offset == 0 || offset == 8 
+                    && "must be aligned and doesn't fit otherwise");
+            assert(ty->size() % 8 == 0 && "Not a multiple of pointer size?");
+            
+            accum.addField(offset, Integer);
+            if (ty->size() > 8)
+                accum.addField(offset+8, Integer);
+        }
+    }
+    
+    Classification classify(Type* ty) {
+        typedef std::map<Type*, Classification> ClassMap;
+        static ClassMap cache;
+        
+        ClassMap::iterator it = cache.find(ty);
+        if (it != cache.end()) {
+            return it->second;
+        } else {
+            Classification cl;
+            classifyType(cl, ty, 0);
+            cache[ty] = cl;
+            return cl;
+        }
+    }
+    
+    /// Returns the type to pass as, or null if no transformation is needed.
+    LLType* getAbiType(Type* ty) {
+        ty = ty->toBasetype();
+        
+        // First, check if there's any need of a transformation:
+        
+        if (keepUnchanged(ty))
+            return 0;
+        
+        if (ty->ty != Tcomplex32 && ty->ty != Tstruct)
+            return 0; // Nothing to do,
+        
+        Classification cl = classify(ty);
+        assert(!cl.isMemory);
+        
+        if (cl.classes[0] == NoClass) {
+            assert(cl.classes[1] == NoClass && "Non-empty struct with empty first half?");
+            return 0; // Empty structs should also be handled correctly by LLVM
+        }
+        
+        // Okay, we may need to transform. Figure out a canonical type:
+        
+        std::vector<const LLType*> parts;
+        
+        unsigned size = ty->size();
+        
+        switch (cl.classes[0]) {
+            case Integer: {
+                unsigned bits = (size >= 8 ? 64 : (size * 8));
+                parts.push_back(LLIntegerType::get(bits));
+                break;
+            }
+            
+            case Sse:
+                parts.push_back(size <= 4 ? LLType::FloatTy : LLType::DoubleTy);
+                break;
+            
+            case X87:
+                assert(cl.classes[1] == X87Up && "Upper half of real not X87Up?");
+                /// The type only contains a single real/ireal field,
+                /// so just use that type.
+                return const_cast<LLType*>(LLType::X86_FP80Ty);
+            
+            default:
+                assert(0 && "Unanticipated argument class");
+        }
+        
+        switch(cl.classes[1]) {
+            case NoClass:
+                assert(parts.size() == 1);
+                // No need to use a single-element struct type.
+                // Just use the element type instead.
+                return const_cast<LLType*>(parts[0]);
+                break;
+            
+            case Integer: {
+                assert(size > 8);
+                unsigned bits = (size - 8) * 8;
+                parts.push_back(LLIntegerType::get(bits));
+                break;
+            }
+            case Sse:
+                parts.push_back(size <= 12 ? LLType::FloatTy : LLType::DoubleTy);
+                break;
+            
+            case X87Up:
+                if(cl.classes[0] == X87) {
+                    // This won't happen: it was short-circuited while
+                    // processing the first half.
+                } else {                    
+                    // I can't find this anywhere in the ABI documentation,
+                    // but this is what gcc does (both regular and llvm-gcc).
+                    // (This triggers for types like union { real r; byte b; })
+                    parts.push_back(LLType::DoubleTy);
+                }
+                break;
+            
+            default:
+                assert(0 && "Unanticipated argument class for second half");
+        }
+        return LLStructType::get(parts);
+    }
+}
+
+
+// Implementation details for extern(D)
+namespace x86_64_D_cc {
+    struct DRegCount {
+        unsigned ints;
+        unsigned sse;
+        unsigned x87;
+        
+        DRegCount(unsigned ints_, unsigned sse_, unsigned x87_)
+        : ints(ints_), sse(sse_), x87(x87_) {}
+    };
+    
+    // Count the number of registers needed for a simple type.
+    // (Not a struct or static array)
+    DRegCount regsNeededForSimpleType(Type* t) {
+        DRegCount r(0, 0, 0);
+        switch(t->ty) {
+            case Tstruct:
+            case Tsarray:
+                assert(0 && "Not a simple type!");
+                // Return huge numbers if assertions are disabled, so it'll always get
+                // bumped to memory.
+                r.ints = r.sse = r.x87 = (unsigned)-1;
+                break;
+            
+            // Floats, doubles and such are passed in SSE registers
+            case Tfloat32:
+            case Tfloat64:
+            case Timaginary32:
+            case Timaginary64:
+                r.sse = 1;
+                break;
+            
+            case Tcomplex32:
+            case Tcomplex64:
+                r.sse = 2;
+                break;
+            
+            // Reals, ireals and creals are passed in x87 registers
+            case Tfloat80:
+            case Timaginary80:
+                r.x87 = 1;
+                break;
+            
+            case Tcomplex80:
+                r.x87 = 2;
+                break;
+            
+            // Anything else is passed in one or two integer registers,
+            // depending on its size.
+            default: {
+                int needed = (t->size() + 7) / 8;
+                assert(needed <= 2);
+                r.ints = needed;
+                break;
+            }
+        }
+        return r;
+    }
+    
+    // Returns true if it's possible (and a good idea) to pass the struct in the
+    // specified number of registers.
+    // (May return false if it's a bad idea to pass the type in registers for
+    // reasons other than it not fitting)
+    // Note that if true is returned, 'left' is also modified to contain the
+    // number of registers left. This property is used in the recursive case.
+    // If false is returned, 'left' is garbage.
+    bool shouldPassStructInRegs(TypeStruct* t, DRegCount& left) {
+        // If it has unaligned fields, there's probably a reason for it,
+        // so keep it in memory.
+        if (hasUnalignedFields(t))
+            return false;
+        
+        Array* fields = &t->sym->fields;
+        d_uns64 nextbyte = 0;
+        for (d_uns64 i = 0; i < fields->dim; i++) {
+            VarDeclaration* field = (VarDeclaration*) fields->data[i];
+            
+            // This depends on ascending order of field offsets in structs
+            // without overlapping fields.
+            if (field->offset < nextbyte) {
+                // Don't return unions (or structs containing them) in registers.
+                return false;
+            }
+            nextbyte = field->offset + field->type->size();
+            
+            switch (field->type->ty) {
+                case Tstruct:
+                    if (!shouldPassStructInRegs((TypeStruct*) field->type, left))
+                        return false;
+                    break;
+                
+                case Tsarray:
+                    // Don't return static arrays in registers
+                    // (indexing registers doesn't work well)
+                    return false;
+                
+                default: {
+                    DRegCount needed = regsNeededForSimpleType(field->type);
+                    if (needed.ints > left.ints || needed.sse > left.sse || needed.x87 > left.x87)
+                        return false;
+                    left.ints -= needed.ints;
+                    left.sse -= needed.sse;
+                    left.x87 -= needed.x87;
+                    break;
+                }
+            }
+        }
+        return true;
+    }
+    
+    // Returns true if the struct fits in return registers in the x86-64 fastcc
+    // calling convention.
+    bool retStructInRegs(TypeStruct* st) {
+        // 'fastcc' allows returns in up to two registers of each kind:
+        DRegCount state(2, 2, 2);
+        return shouldPassStructInRegs(st, state);
+    }
+    
+    // Heuristic for determining whether to pass a struct type directly or
+    // bump it to memory.
+    bool passStructTypeDirectly(TypeStruct* st) {
+        // If the type fits in a reasonable number of registers,
+        // pass it directly.
+        // This does not necessarily mean it will actually be passed in
+        // registers. For example, x87 registers are never actually used for
+        // parameters.
+        DRegCount state(2, 2, 2);
+        return shouldPassStructInRegs(st, state);
+        
+        // This doesn't work well: Since the register count can differ depending
+        // on backend options, there's no way to be exact anyway.
+        /*
+        // Regular fastcc:      6 int, 8 sse, 0 x87
+        // fastcc + tailcall:   5 int, 8 sse, 0 x87
+        RegCount state(5, 8, 0);
+        */
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+
+/// Just store to memory and it's readable as the other type.
+struct X86_64_C_struct_rewrite : ABIRewrite {
+    // Get struct from ABI-mangled representation
+    LLValue* get(Type* dty, DValue* v)
+    {
+        LLValue* lval;
+        if (v->isLVal()) {
+            lval = v->getLVal();
+        } else {
+            // No memory location, create one.
+            LLValue* rval = v->getRVal();
+            lval = DtoAlloca(rval->getType());
+            DtoStore(rval, lval);
+        }
+        
+        const LLType* pTy = getPtrToType(DtoType(dty));
+        return DtoLoad(DtoBitCast(lval, pTy), "get-result");
+    }
+    
+    // Get struct from ABI-mangled representation, and store in the provided location.
+    void getL(Type* dty, DValue* v, llvm::Value* lval) {
+        LLValue* rval = v->getRVal();
+        const LLType* pTy = getPtrToType(rval->getType());
+        DtoStore(rval, DtoBitCast(lval, pTy));
+    }
+    
+    // Turn a struct into an ABI-mangled representation
+    LLValue* put(Type* dty, DValue* v)
+    {
+        LLValue* lval;
+        if (v->isLVal()) {
+            lval = v->getLVal();
+        } else {
+            // No memory location, create one.
+            LLValue* rval = v->getRVal();
+            lval = DtoAlloca(rval->getType());
+            DtoStore(rval, lval);
+        }
+        
+        LLType* abiTy = getAbiType(dty);
+        assert(abiTy && "Why are we rewriting a non-rewritten type?");
+        
+        const LLType* pTy = getPtrToType(abiTy);
+        return DtoLoad(DtoBitCast(lval, pTy), "put-result");
+    }
+    
+    /// should return the transformed type for this rewrite
+    const LLType* type(Type* dty, const LLType* t)
+    {
+        return getAbiType(dty);
+    }
+};
+
+
+struct RegCount {
+    unsigned char int_regs, sse_regs;
+};
+
+
+struct X86_64TargetABI : TargetABI {
+    X86_64_C_struct_rewrite struct_rewrite;
+    
+    void newFunctionType(TypeFunction* tf) {
+        funcTypeStack.push_back(FuncTypeData(tf->linkage));
+    }
+    
+    bool returnInArg(TypeFunction* tf);
+    
+    bool passByVal(Type* t);
+    
+    void rewriteFunctionType(TypeFunction* tf);
+    
+    void doneWithFunctionType() {
+        funcTypeStack.pop_back();
+    }
+    
+private:
+    struct FuncTypeData {
+        LINK linkage;       // Linkage of the function type currently under construction
+        RegCount state;     // bookkeeping for extern(C) parameter registers
+        
+        FuncTypeData(LINK linkage_)
+        : linkage(linkage_)
+        {
+            state.int_regs = 6;
+            state.sse_regs = 8;
+        }
+    };
+    std::vector<FuncTypeData> funcTypeStack;
+    
+    LINK linkage() {
+        assert(funcTypeStack.size() != 0);
+        return funcTypeStack.back().linkage;
+    }
+    
+    RegCount& state() {
+        assert(funcTypeStack.size() != 0);
+        return funcTypeStack.back().state;
+    }
+    
+    void fixup(IrFuncTyArg& arg);
+};
+
+
+// The public getter for abi.cpp
+TargetABI* getX86_64TargetABI() {
+    return new X86_64TargetABI;
+}
+
+
+bool X86_64TargetABI::returnInArg(TypeFunction* tf) {
+    assert(linkage() == tf->linkage);
+    Type* rt = tf->next->toBasetype();
+    
+    if (tf->linkage == LINKd) {
+        assert(rt->ty != Tsarray && "Update calling convention for static array returns");
+        
+        // All non-structs can be returned in registers.
+        if (rt->ty != Tstruct)
+            return false;
+        
+        // Try to figure out whether the struct fits in return registers
+        // and whether it's a good idea to put it there.
+        return !x86_64_D_cc::retStructInRegs((TypeStruct*) rt);
+    } else {
+        if (rt == Type::tvoid || keepUnchanged(rt))
+            return false;
+        
+        Classification cl = classify(rt);
+        return cl.isMemory;
+    }
+}
+
+bool X86_64TargetABI::passByVal(Type* t) {
+    if (linkage() == LINKd) {
+        if (t->ty != Tstruct)
+            return false;
+        
+        // Try to be smart about which structs are passed in memory.
+        return !x86_64_D_cc::passStructTypeDirectly((TypeStruct*) t);
+    } else {
+        // This implements the C calling convention for x86-64.
+        // It might not be correct for other calling conventions.
+        Classification cl = classify(t);
+        if (cl.isMemory)
+            return true;
+        
+        // Figure out how many registers we want for this arg:
+        RegCount wanted = { 0, 0 };
+        for (int i = 0 ; i < 2; i++) {
+            if (cl.classes[i] == Integer)
+                wanted.int_regs++;
+            else if (cl.classes[i] == Sse)
+                wanted.sse_regs++;
+        }
+        
+        // See if they're available:
+        RegCount& state = this->state();
+        if (wanted.int_regs <= state.int_regs && wanted.sse_regs <= state.sse_regs) {
+            state.int_regs -= wanted.int_regs;
+            state.sse_regs -= wanted.sse_regs;
+        } else {
+            if (keepUnchanged(t)) {
+                // Not enough registers available, but this is passed as if it's
+                // multiple arguments. Just use the registers there are,
+                // automatically spilling the rest to memory.
+                if (wanted.int_regs > state.int_regs)
+                    state.int_regs = 0;
+                else
+                    state.int_regs -= wanted.int_regs;
+                
+                if (wanted.sse_regs > state.sse_regs)
+                    state.sse_regs = 0;
+                else
+                    state.sse_regs -= wanted.sse_regs;
+            } else if (t->iscomplex() || t->ty == Tstruct) {
+                // Spill entirely to memory, even if some of the registers are
+                // available.
+                
+                // FIXME: Don't do this if *none* of the wanted registers are available,
+                //        (i.e. only when absolutely necessary for abi-compliance)
+                //        so it gets alloca'd by the callee and -scalarrepl can
+                //        more easily break it up?
+                // Note: this won't be necessary if the following LLVM bug gets fixed:
+                //       http://llvm.org/bugs/show_bug.cgi?id=3741
+                return true;
+            } else {
+                assert(t == Type::tfloat80 || t == Type::timaginary80 || t->size() < 8
+                    && "What other big types are there?"); // other than static arrays...
+                // In any case, they shouldn't be represented as structs in LLVM:
+                assert(!isaStruct(DtoType(t)));
+            }
+        }
+        // Everything else that's passed in memory is handled by LLVM.
+        return false;
+    }
+}
+
+// Helper function for rewriteFunctionType.
+// Return type and parameters are passed here (unless they're already in memory)
+// to get the rewrite applied (if necessary).
+void X86_64TargetABI::fixup(IrFuncTyArg& arg) {
+    LLType* abiTy = getAbiType(arg.type);
+    
+    if (abiTy && abiTy != arg.ltype) {
+        assert(arg.type == Type::tcomplex32 || arg.type->ty == Tstruct);
+        arg.ltype = abiTy;
+        arg.rewrite = &struct_rewrite;
+    }
+}
+
+void X86_64TargetABI::rewriteFunctionType(TypeFunction* tf) {
+    // extern(D) is handled entirely by passByVal and returnInArg
+    
+    if (tf->linkage != LINKd) {
+        // TODO: See if this is correct for more than just extern(C).
+        
+        IrFuncTy* fty = tf->fty;
+        
+        if (!fty->arg_sret) {
+            Logger::println("x86-64 ABI: Transforming return type");
+            Type* rt = fty->ret->type->toBasetype();
+            if (rt != Type::tvoid)
+                fixup(*fty->ret);
+        }
+        
+        
+        Logger::println("x86-64 ABI: Transforming arguments");
+        LOG_SCOPE;
+        
+        for (IrFuncTy::ArgIter I = fty->args.begin(), E = fty->args.end(); I != E; ++I) {
+            IrFuncTyArg& arg = **I;
+            
+            if (Logger::enabled())
+                Logger::cout() << "Arg: " << arg.type->toChars() << '\n';
+            
+            // Arguments that are in memory are of no interest to us.
+            if (arg.byref)
+                continue;
+            
+            Type* ty = arg.type->toBasetype();
+            
+            fixup(arg);
+            
+            if (Logger::enabled())
+                Logger::cout() << "New arg type: " << *arg.ltype << '\n';
+        }
+    }
+}
diff --git a/gen/abi-x86-64.h b/gen/abi-x86-64.h
new file mode 100644
index 00000000..d5ad76cc
--- /dev/null
+++ b/gen/abi-x86-64.h
@@ -0,0 +1,10 @@
+#ifndef __LDC_GEN_ABI_X86_64_H__
+#define __LDC_GEN_ABI_X86_64_H__
+
+#include "gen/abi.h"
+
+
+TargetABI* getX86_64TargetABI();
+
+
+#endif
diff --git a/gen/abi.cpp b/gen/abi.cpp
index 3ff740f7..2810264c 100644
--- a/gen/abi.cpp
+++ b/gen/abi.cpp
@@ -261,101 +261,7 @@ struct X86TargetABI : TargetABI
 //////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////
 
-struct X86_64_cfloat_rewrite : ABIRewrite
-{
-    // {double} -> {float,float}
-    LLValue* get(Type*, DValue* dv)
-    {
-        LLValue* in = dv->getRVal();
-
-        // extract double
-        LLValue* v = gIR->ir->CreateExtractValue(in, 0);
-        // cast to i64
-        v = gIR->ir->CreateBitCast(v, LLType::Int64Ty);
-
-        // extract real part
-        LLValue* rpart = gIR->ir->CreateTrunc(v, LLType::Int32Ty);
-        rpart = gIR->ir->CreateBitCast(rpart, LLType::FloatTy, ".re");
-
-        // extract imag part
-        LLValue* ipart = gIR->ir->CreateLShr(v, LLConstantInt::get(LLType::Int64Ty, 32, false));
-        ipart = gIR->ir->CreateTrunc(ipart, LLType::Int32Ty);
-        ipart = gIR->ir->CreateBitCast(ipart, LLType::FloatTy, ".im");
-
-        // return {float,float} aggr pair with same bits
-        return DtoAggrPair(rpart, ipart, ".final_cfloat");
-    }
-
-    // {float,float} -> {double}
-    LLValue* put(Type*, DValue* dv)
-    {
-        LLValue* v = dv->getRVal();
-
-        // extract real
-        LLValue* r = gIR->ir->CreateExtractValue(v, 0);
-        // cast to i32
-        r = gIR->ir->CreateBitCast(r, LLType::Int32Ty);
-        // zext to i64
-        r = gIR->ir->CreateZExt(r, LLType::Int64Ty);
-
-        // extract imag
-        LLValue* i = gIR->ir->CreateExtractValue(v, 1);
-        // cast to i32
-        i = gIR->ir->CreateBitCast(i, LLType::Int32Ty);
-        // zext to i64
-        i = gIR->ir->CreateZExt(i, LLType::Int64Ty);
-        // shift up
-        i = gIR->ir->CreateShl(i, LLConstantInt::get(LLType::Int64Ty, 32, false));
-
-        // combine
-        v = gIR->ir->CreateOr(r, i);
-
-        // cast to double
-        v = gIR->ir->CreateBitCast(v, LLType::DoubleTy);
-
-        // return {double}
-        const LLType* t = LLStructType::get(LLType::DoubleTy, NULL);
-        LLValue* undef = llvm::UndefValue::get(t);
-        return gIR->ir->CreateInsertValue(undef, v, 0);
-    }
-
-    // {float,float} -> {double}
-    const LLType* type(Type*, const LLType* t)
-    {
-        return LLStructType::get(LLType::DoubleTy, NULL);
-    }
-};
-
-//////////////////////////////////////////////////////////////////////////////
-
-struct X86_64TargetABI : TargetABI
-{
-    X86_64_cfloat_rewrite cfloat_rewrite;
-
-    bool returnInArg(TypeFunction* tf)
-    {
-        Type* rt = tf->next->toBasetype();
-        return (rt->ty == Tstruct);
-    }
-
-    bool passByVal(Type* t)
-    {
-        return t->toBasetype()->ty == Tstruct;
-    }
-
-    void rewriteFunctionType(TypeFunction* tf)
-    {
-        IrFuncTy* fty = tf->fty;
-        Type* rt = fty->ret->type->toBasetype();
-
-        // rewrite cfloat return for !extern(D)
-        if (tf->linkage != LINKd && rt == Type::tcomplex32)
-        {
-            fty->ret->rewrite = &cfloat_rewrite;
-            fty->ret->ltype = cfloat_rewrite.type(fty->ret->type, fty->ret->ltype);
-        }
-    }
-};
+#include "gen/abi-x86-64.h"
 
 //////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////
@@ -395,7 +301,7 @@ TargetABI * TargetABI::getTarget()
     case ARCHx86:
         return new X86TargetABI;
     case ARCHx86_64:
-        return new X86_64TargetABI;
+        return getX86_64TargetABI();
     default:
         Logger::cout() << "WARNING: Unknown ABI, guessing...\n";
         return new UnknownTargetABI;
diff --git a/gen/abi.h b/gen/abi.h
index 494e34ca..5602aa43 100644
--- a/gen/abi.h
+++ b/gen/abi.h
@@ -35,8 +35,10 @@ struct TargetABI
 {
     static TargetABI* getTarget();
 
+    virtual void newFunctionType(TypeFunction* tf) {}
     virtual bool returnInArg(TypeFunction* tf) = 0;
     virtual bool passByVal(Type* t) = 0;
+    virtual void doneWithFunctionType() {}
 
     virtual void rewriteFunctionType(TypeFunction* t) = 0;
 };
diff --git a/gen/classes.cpp b/gen/classes.cpp
index 5eb97f8e..2c863937 100644
--- a/gen/classes.cpp
+++ b/gen/classes.cpp
@@ -957,6 +957,7 @@ DValue* DtoNewClass(Loc loc, TypeClass* tc, NewExp* newexp)
     // call constructor
     if (newexp->member)
     {
+        Logger::println("Calling constructor");
         assert(newexp->arguments != NULL);
         DtoForceDeclareDsymbol(newexp->member);
         DFuncValue dfn(newexp->member, newexp->member->ir.irFunc->func, mem);
diff --git a/gen/functions.cpp b/gen/functions.cpp
index ce4b248b..2041e9ac 100644
--- a/gen/functions.cpp
+++ b/gen/functions.cpp
@@ -24,6 +24,9 @@
 
 const llvm::FunctionType* DtoFunctionType(Type* type, Type* thistype, Type* nesttype, bool ismain)
 {
+    if (Logger::enabled())
+        Logger::println("DtoFunctionType(%s)", type->toChars());
+    LOG_SCOPE
     // sanity check
     assert(type->ty == Tfunction);
     TypeFunction* f = (TypeFunction*)type;
@@ -34,6 +37,9 @@ const llvm::FunctionType* DtoFunctionType(Type* type, Type* thistype, Type* nest
         return llvm::cast<llvm::FunctionType>(type->ir.type->get());
     }
 
+    // Tell the ABI we're resolving a new function type
+    gABI->newFunctionType(f);
+
     // create new ir funcTy
     assert(f->fty == NULL);
     f->fty = new IrFuncTy();
@@ -158,6 +164,9 @@ const llvm::FunctionType* DtoFunctionType(Type* type, Type* thistype, Type* nest
     // let the abi rewrite the types as necesary
     gABI->rewriteFunctionType(f);
 
+    // Tell the ABI we're done with this function type
+    gABI->doneWithFunctionType();
+
     // build the function type
     std::vector<const LLType*> argtypes;
     argtypes.reserve(lidx);
@@ -184,6 +193,8 @@ const llvm::FunctionType* DtoFunctionType(Type* type, Type* thistype, Type* nest
     llvm::FunctionType* functype = llvm::FunctionType::get(f->fty->ret->ltype, argtypes, f->fty->c_vararg);
     f->ir.type = new llvm::PATypeHolder(functype);
 
+    Logger::cout() << "Final function type: " << *functype << "\n";
+
     return functype;
 }
 
@@ -571,7 +582,8 @@ void DtoDefineFunction(FuncDeclaration* fd)
 
     assert(fd->ir.declared);
 
-    Logger::println("DtoDefineFunc(%s): %s", fd->toPrettyChars(), fd->loc.toChars());
+    if (Logger::enabled())
+        Logger::println("DtoDefineFunc(%s): %s", fd->toPrettyChars(), fd->loc.toChars());
     LOG_SCOPE;
 
     // if this function is naked, we take over right away! no standard processing!
diff --git a/gen/naked.cpp b/gen/naked.cpp
index b31e1510..72fe0cf0 100644
--- a/gen/naked.cpp
+++ b/gen/naked.cpp
@@ -168,13 +168,6 @@ void DtoDefineNakedFunction(FuncDeclaration* fd)
 
 //////////////////////////////////////////////////////////////////////////////////////////
 
-static LLValue* x86_64_cfloatRetFixup(IRBuilderHelper b, LLValue* orig) {
-    assert(orig->getType() == LLType::DoubleTy);
-    LLType* retty = LLStructType::get(LLType::DoubleTy, NULL);
-    LLValue* undef = llvm::UndefValue::get(retty);
-    return b->CreateInsertValue(undef, orig, 0, "asm.ret");
-}
-
 void emitABIReturnAsmStmt(IRAsmBlock* asmblock, Loc loc, FuncDeclaration* fdecl)
 {
     Logger::println("emitABIReturnAsmStmt(%s)", fdecl->mangle());
@@ -186,6 +179,9 @@ void emitABIReturnAsmStmt(IRAsmBlock* asmblock, Loc loc, FuncDeclaration* fdecl)
     asmblock->retty = llretTy;
     asmblock->retn = 1;
 
+    // FIXME: This should probably be handled by the TargetABI somehow.
+    //        It should be able to do this for a greater variety of types.
+
     // x86
     if (global.params.cpu == ARCHx86)
     {
@@ -293,7 +289,6 @@ void emitABIReturnAsmStmt(IRAsmBlock* asmblock, Loc loc, FuncDeclaration* fdecl)
                 // extern(C) cfloat -> %xmm0 (extract two floats)
                 as->out_c = "={xmm0},";
                 asmblock->retty = LLType::DoubleTy;
-                asmblock->retfixup = &x86_64_cfloatRetFixup;
             } else if (rt->iscomplex()) {
                 // cdouble and extern(D) cfloat -> re=%xmm0, im=%xmm1
                 as->out_c = "={xmm0},={xmm1},";
diff --git a/gen/statements.cpp b/gen/statements.cpp
index 180f5467..8537366d 100644
--- a/gen/statements.cpp
+++ b/gen/statements.cpp
@@ -97,6 +97,15 @@ void ReturnStatement::toIR(IRState* p)
             if (Logger::enabled())
                 Logger::cout() << "return value is '" <<*v << "'\n";
 
+            IrFunction* f = p->func();
+            // Hack around LDC assuming structs are in memory:
+            // If the function returns a struct, and the return value is a
+            // pointer to a struct, load from it before returning.
+            if (f->type->next->ty == Tstruct && isaPointer(v->getType())) {
+                Logger::println("Loading struct type for return");
+                v = DtoLoad(v);
+            }
+
             // can happen for classes and void main
             if (v->getType() != p->topfunc()->getReturnType())
             {
diff --git a/gen/tocall.cpp b/gen/tocall.cpp
index 98dab18f..b55e0f5c 100644
--- a/gen/tocall.cpp
+++ b/gen/tocall.cpp
@@ -217,6 +217,11 @@ void DtoBuildDVarArgList(std::vector<LLValue*>& args, std::vector<llvm::Attribut
 
 DValue* DtoCallFunction(Loc& loc, Type* resulttype, DValue* fnval, Expressions* arguments)
 {
+    if (Logger::enabled()) {
+        Logger::println("DtoCallFunction()");
+    }
+    LOG_SCOPE
+
     // the callee D type
     Type* calleeType = fnval->getType();
 
@@ -386,6 +391,15 @@ DValue* DtoCallFunction(Loc& loc, Type* resulttype, DValue* fnval, Expressions*
 
             int j = tf->fty->reverseParams ? beg + n - i - 1 : beg + i;
 
+            // Hack around LDC assuming structs are in memory:
+            // If the function wants a struct, and the argument value is a
+            // pointer to a struct, load from it before passing it in.
+            if (argval->getType()->ty == Tstruct
+                    && isaPointer(arg) && !isaPointer(callableTy->getParamType(j))) {
+                Logger::println("Loading struct type for function argument");
+                arg = DtoLoad(arg);
+            }
+
             // parameter type mismatch, this is hard to get rid of
             if (arg->getType() != callableTy->getParamType(j))
             {
@@ -468,24 +482,24 @@ DValue* DtoCallFunction(Loc& loc, Type* resulttype, DValue* fnval, Expressions*
     // get return value
     LLValue* retllval = (retinptr) ? args[0] : call.getInstruction();
 
-    if (tf->linkage == LINKintrinsic)
-    {
-        // Ignore ABI for intrinsics
-        Type* rettype = tf->next;
-        if (rettype->ty == Tstruct) {
-            // LDC assumes structs are in memory, so put it there.
-            LLValue* mem = DtoAlloca(retllval->getType());
-            DtoStore(retllval, mem);
-            retllval = mem;
-        }
-    }
-    else if (!retinptr)
+    // Ignore ABI for intrinsics
+    if (tf->linkage != LINKintrinsic && !retinptr)
     {
         // do abi specific return value fixups
         DImValue dretval(tf->next, retllval);
         retllval = tf->fty->getRet(tf->next, &dretval);
     }
 
+    // Hack around LDC assuming structs are in memory:
+    // If the function returns a struct, and the return value is not a
+    // pointer to a struct, store it to a stack slot before continuing.
+    if (tf->next->ty == Tstruct && !isaPointer(retllval)) {
+        Logger::println("Storing return value to stack slot");
+        LLValue* mem = DtoAlloca(retllval->getType());
+        DtoStore(retllval, mem);
+        retllval = mem;
+    }
+
     // repaint the type if necessary
     if (resulttype)
     {
diff --git a/ir/irfunction.h b/ir/irfunction.h
index 951929e7..d4b50efd 100644
--- a/ir/irfunction.h
+++ b/ir/irfunction.h
@@ -64,7 +64,9 @@ struct IrFuncTy : IrBase
     IrFuncTyArg* arg_argptr;
 
     // normal explicit arguments
-    LLSmallVector<IrFuncTyArg*, 4> args;
+    typedef LLSmallVector<IrFuncTyArg*, 4> ArgList;
+    typedef ArgList::iterator ArgIter;
+    ArgList args;
 
     // C varargs
     bool c_vararg;