view gen/abi-x86-64.cpp @ 1348:f3b92d26ad0f

Revert [1346] for now, it breaks because of padding :(
author Frits van Bommel <fvbommel wxs.nl>
date Tue, 12 May 2009 16:53:43 +0200
parents 6f4154b318ef
children 15e9762bb620
line wrap: on
line source

/* TargetABI implementation for x86-64.
 * Written for LDC by Frits van Bommel in 2009.
 * 
 * extern(D) follows no particular external ABI, but tries to be smart about
 * passing structs and returning them. It should probably be reviewed if the
 * way LLVM implements fastcc on this platform ever changes.
 * (Specifically, the number of return registers of various types is hardcoded)
 * 
 * 
 * extern(C) implements the C calling convention for x86-64, as found in
 * http://www.x86-64.org/documentation/abi-0.99.pdf
 * 
 * Note:
 *   Where a discrepancy was found between llvm-gcc and the ABI documentation,
 *   llvm-gcc behavior was used for compatibility (after it was verified that
 *   regular gcc has the same behavior).
 * 
 * LLVM gets it right for most types, but complex numbers and structs need some
 * help. To make sure it gets those right we essentially bitcast small structs
 * to a type to which LLVM assigns the appropriate registers, and pass that
 * instead. Structs that are required to be passed in memory are explicitly
 * marked with the ByVal attribute to ensure no part of them ends up in
 * registers when only a subset of the desired registers are available.
 * 
 * We don't perform the same transformation for D-specific types that contain
 * multiple parts, such as dynamic arrays and delegates. They're passed as if
 * the parts were passed as separate parameters. This helps make things like
 * printf("%.*s", o.toString()) work as expected; if we didn't do this that
 * wouldn't work if there were 4 other integer/pointer arguments before the
 * toString() call because the string got bumped to memory with one integer
 * register still free. Keeping it untransformed puts the length in a register
 * and the pointer in memory, as printf expects it.
 */

#include "mtype.h"
#include "declaration.h"
#include "aggregate.h"

#include "gen/llvm.h"
#include "gen/tollvm.h"
#include "gen/logger.h"
#include "gen/dvalue.h"
#include "gen/llvmhelpers.h"
#include "gen/abi.h"
#include "gen/abi-x86-64.h"
//#include "gen/llvm-version.h"         // only use is commented out.
#include "ir/irfunction.h"

#include <cassert>
#include <map>
#include <string>
#include <utility>

// Implementation details for extern(C)
namespace {
    /**
     * This function helps filter out things that look like structs to C,
     * but should be passed to C in separate arguments anyway.
     * 
     * (e.g. dynamic arrays are passed as separate length and ptr. This
     * is both less work and makes printf("%.*s", o.toString()) work)
     */
    inline bool keepUnchanged(Type* t) {
        switch (t->ty) {
            case Tarray:    // dynamic array
            case Taarray:   // assoc array
            case Tdelegate:
                return true;
            
            default:
                return false;
        }
    }
    
    enum ArgClass {
        Integer, Sse, SseUp, X87, X87Up, ComplexX87, NoClass, Memory
    };
    
    struct Classification {
        bool isMemory;
        ArgClass classes[2];
        
        Classification() : isMemory(false) {
            classes[0] = NoClass;
            classes[1] = NoClass;
        }
        
        void addField(unsigned offset, ArgClass cl) {
            if (isMemory)
                return;
            
            // Note that we don't need to bother checking if it crosses 8 bytes.
            // We don't get here with unaligned fields, and anything that can be
            // big enough to cross 8 bytes (cdoubles, reals, structs and arrays)
            // is special-cased in classifyType()
            int idx = (offset < 8 ? 0 : 1);
            
            ArgClass nw = merge(classes[idx], cl);
            if (nw != classes[idx]) {
                classes[idx] = nw;
                
                if (nw == Memory) {
                    classes[1-idx] = Memory;
                    isMemory = true;
                }
            }
        }
        
    private:
        ArgClass merge(ArgClass accum, ArgClass cl) {
            if (accum == cl)
                return accum;
            if (accum == NoClass)
                return cl;
            if (cl == NoClass)
                return accum;
            if (accum == Memory || cl == Memory)
                return Memory;
            if (accum == Integer || cl == Integer)
                return Integer;
            if (accum == X87 || accum == X87Up || accum == ComplexX87 ||
                cl == X87 || cl == X87Up || cl == ComplexX87)
                return Memory;
            return Sse;
        }
    };
    
    void classifyType(Classification& accum, Type* ty, d_uns64 offset) {
        if (Logger::enabled())
            Logger::cout() << "Classifying " << ty->toChars() << " @ " << offset << '\n';
        
        ty = ty->toBasetype();
        
        if (ty->isintegral() || ty->ty == Tpointer) {
            accum.addField(offset, Integer);
        } else if (ty->ty == Tfloat80 || ty->ty == Timaginary80) {
            accum.addField(offset, X87);
            accum.addField(offset+8, X87Up);
        } else if (ty->ty == Tcomplex80) {
            accum.addField(offset, ComplexX87);
            // make sure other half knows about it too:
            accum.addField(offset+16, ComplexX87);
        } else if (ty->ty == Tcomplex64) {
            accum.addField(offset, Sse);
            accum.addField(offset+8, Sse);
        } else if (ty->ty == Tcomplex32) {
            accum.addField(offset, Sse);
            accum.addField(offset+4, Sse);
        } else if (ty->isfloating()) {
            accum.addField(offset, Sse);
        } else if (ty->size() > 16 || hasUnalignedFields(ty)) {
            // This isn't creal, yet is > 16 bytes, so pass in memory.
            // Must be after creal case but before arrays and structs,
            // the other types that can get bigger than 16 bytes
            accum.addField(offset, Memory);
        } else if (ty->ty == Tsarray) {
            Type* eltType = ty->nextOf();
            d_uns64 eltsize = eltType->size();
            if (eltsize > 0) {
                d_uns64 dim = ty->size() / eltsize;
                assert(dim <= 16
                        && "Array of non-empty type <= 16 bytes but > 16 elements?");
                for (d_uns64 i = 0; i < dim; i++) {
                    classifyType(accum, eltType, offset);
                    offset += eltsize;
                }
            }
        } else if (ty->ty == Tstruct) {
            Array* fields = &((TypeStruct*) ty)->sym->fields;
            for (size_t i = 0; i < fields->dim; i++) {
                VarDeclaration* field = (VarDeclaration*) fields->data[i];
                classifyType(accum, field->type, offset + field->offset);
            }
        } else {
            if (Logger::enabled())
                Logger::cout() << "x86-64 ABI: Implicitly handled type: "
                               << ty->toChars() << '\n';
            // arrays, delegates, etc. (pointer-sized fields, <= 16 bytes)
            assert(offset == 0 || offset == 8 
                    && "must be aligned and doesn't fit otherwise");
            assert(ty->size() % 8 == 0 && "Not a multiple of pointer size?");
            
            accum.addField(offset, Integer);
            if (ty->size() > 8)
                accum.addField(offset+8, Integer);
        }
    }
    
    Classification classify(Type* ty) {
        typedef std::map<Type*, Classification> ClassMap;
        static ClassMap cache;
        
        ClassMap::iterator it = cache.find(ty);
        if (it != cache.end()) {
            return it->second;
        } else {
            Classification cl;
            classifyType(cl, ty, 0);
            cache[ty] = cl;
            return cl;
        }
    }
    
    /// Returns the type to pass as, or null if no transformation is needed.
    LLType* getAbiType(Type* ty) {
        ty = ty->toBasetype();
        
        // First, check if there's any need of a transformation:
        
        if (keepUnchanged(ty))
            return 0;
        
        if (ty->ty != Tcomplex32 && ty->ty != Tstruct)
            return 0; // Nothing to do,
        
        Classification cl = classify(ty);
        assert(!cl.isMemory);
        
        if (cl.classes[0] == NoClass) {
            assert(cl.classes[1] == NoClass && "Non-empty struct with empty first half?");
            return 0; // Empty structs should also be handled correctly by LLVM
        }
        
        // Okay, we may need to transform. Figure out a canonical type:
        
        std::vector<const LLType*> parts;
        
        unsigned size = ty->size();
        
        switch (cl.classes[0]) {
            case Integer: {
                unsigned bits = (size >= 8 ? 64 : (size * 8));
                parts.push_back(LLIntegerType::get(bits));
                break;
            }
            
            case Sse:
                parts.push_back(size <= 4 ? LLType::FloatTy : LLType::DoubleTy);
                break;
            
            case X87:
                assert(cl.classes[1] == X87Up && "Upper half of real not X87Up?");
                /// The type only contains a single real/ireal field,
                /// so just use that type.
                return const_cast<LLType*>(LLType::X86_FP80Ty);
            
            default:
                assert(0 && "Unanticipated argument class");
        }
        
        switch(cl.classes[1]) {
            case NoClass:
                assert(parts.size() == 1);
                // No need to use a single-element struct type.
                // Just use the element type instead.
                return const_cast<LLType*>(parts[0]);
                break;
            
            case Integer: {
                assert(size > 8);
                unsigned bits = (size - 8) * 8;
                parts.push_back(LLIntegerType::get(bits));
                break;
            }
            case Sse:
                parts.push_back(size <= 12 ? LLType::FloatTy : LLType::DoubleTy);
                break;
            
            case X87Up:
                if(cl.classes[0] == X87) {
                    // This won't happen: it was short-circuited while
                    // processing the first half.
                } else {                    
                    // I can't find this anywhere in the ABI documentation,
                    // but this is what gcc does (both regular and llvm-gcc).
                    // (This triggers for types like union { real r; byte b; })
                    parts.push_back(LLType::DoubleTy);
                }
                break;
            
            default:
                assert(0 && "Unanticipated argument class for second half");
        }
        return LLStructType::get(parts);
    }
}


// Implementation details for extern(D)
namespace x86_64_D_cc {
    struct DRegCount {
        unsigned ints;
        unsigned sse;
        unsigned x87;
        
        DRegCount(unsigned ints_, unsigned sse_, unsigned x87_)
        : ints(ints_), sse(sse_), x87(x87_) {}
    };
    
    // Count the number of registers needed for a simple type.
    // (Not a struct or static array)
    DRegCount regsNeededForSimpleType(Type* t) {
        DRegCount r(0, 0, 0);
        switch(t->ty) {
            case Tstruct:
            case Tsarray:
                assert(0 && "Not a simple type!");
                // Return huge numbers if assertions are disabled, so it'll always get
                // bumped to memory.
                r.ints = r.sse = r.x87 = (unsigned)-1;
                break;
            
            // Floats, doubles and such are passed in SSE registers
            case Tfloat32:
            case Tfloat64:
            case Timaginary32:
            case Timaginary64:
                r.sse = 1;
                break;
            
            case Tcomplex32:
            case Tcomplex64:
                r.sse = 2;
                break;
            
            // Reals, ireals and creals are passed in x87 registers
            case Tfloat80:
            case Timaginary80:
                r.x87 = 1;
                break;
            
            case Tcomplex80:
                r.x87 = 2;
                break;
            
            // Anything else is passed in one or two integer registers,
            // depending on its size.
            default: {
                int needed = (t->size() + 7) / 8;
                assert(needed <= 2);
                r.ints = needed;
                break;
            }
        }
        return r;
    }
    
    // Returns true if it's possible (and a good idea) to pass the struct in the
    // specified number of registers.
    // (May return false if it's a bad idea to pass the type in registers for
    // reasons other than it not fitting)
    // Note that if true is returned, 'left' is also modified to contain the
    // number of registers left. This property is used in the recursive case.
    // If false is returned, 'left' is garbage.
    bool shouldPassStructInRegs(TypeStruct* t, DRegCount& left) {
        // If it has unaligned fields, there's probably a reason for it,
        // so keep it in memory.
        if (hasUnalignedFields(t))
            return false;
        
        Array* fields = &t->sym->fields;
        d_uns64 nextbyte = 0;
        for (d_uns64 i = 0; i < fields->dim; i++) {
            VarDeclaration* field = (VarDeclaration*) fields->data[i];
            
            // This depends on ascending order of field offsets in structs
            // without overlapping fields.
            if (field->offset < nextbyte) {
                // Don't return unions (or structs containing them) in registers.
                return false;
            }
            nextbyte = field->offset + field->type->size();
            
            switch (field->type->ty) {
                case Tstruct:
                    if (!shouldPassStructInRegs((TypeStruct*) field->type, left))
                        return false;
                    break;
                
                case Tsarray:
                    // Don't return static arrays in registers
                    // (indexing registers doesn't work well)
                    return false;
                
                default: {
                    DRegCount needed = regsNeededForSimpleType(field->type);
                    if (needed.ints > left.ints || needed.sse > left.sse || needed.x87 > left.x87)
                        return false;
                    left.ints -= needed.ints;
                    left.sse -= needed.sse;
                    left.x87 -= needed.x87;
                    break;
                }
            }
        }
        return true;
    }
    
    // Returns true if the struct fits in return registers in the x86-64 fastcc
    // calling convention.
    bool retStructInRegs(TypeStruct* st) {
        // 'fastcc' allows returns in up to two registers of each kind:
        DRegCount state(2, 2, 2);
    #if 1 //LLVM_REV < 67588
        // (If uncommenting the LLVM_REV line above, also uncomment llvm-version #include
        
        // LLVM before trunk r67588 doesn't allow a second int to be an i1 or
        // i8. (See <http://llvm.org/PR3861>)
        // Rather than complicating shouldPassStructInRegs(), just disallow
        // second integers for now.
        // FIXME: Disabling this for older LLVM only makes the abi dependent on
        //        LLVM revision, which seems like a bad idea. We could extend
        //        i8 parts to i16 to work around this issue until 2.6...
        // TODO: Remove this workaround when support for LLVM 2.5 is dropped.
        state.ints = 1;
    #endif
        return shouldPassStructInRegs(st, state);
    }
    
    // Heuristic for determining whether to pass a struct type directly or
    // bump it to memory.
    bool passStructTypeDirectly(TypeStruct* st) {
        // If the type fits in a reasonable number of registers,
        // pass it directly.
        // This does not necessarily mean it will actually be passed in
        // registers. For example, x87 registers are never actually used for
        // parameters.
        DRegCount state(2, 2, 2);
        return shouldPassStructInRegs(st, state);
        
        // This doesn't work well: Since the register count can differ depending
        // on backend options, there's no way to be exact anyway.
        /*
        // Regular fastcc:      6 int, 8 sse, 0 x87
        // fastcc + tailcall:   5 int, 8 sse, 0 x87
        RegCount state(5, 8, 0);
        */
    }
}

////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////


/// Just store to memory and it's readable as the other type.
struct X86_64_C_struct_rewrite : ABIRewrite {
    // Get struct from ABI-mangled representation
    LLValue* get(Type* dty, DValue* v)
    {
        LLValue* lval;
        if (v->isLVal()) {
            lval = v->getLVal();
        } else {
            // No memory location, create one.
            LLValue* rval = v->getRVal();
            lval = DtoAlloca(rval->getType());
            DtoStore(rval, lval);
        }
        
        const LLType* pTy = getPtrToType(DtoType(dty));
        return DtoLoad(DtoBitCast(lval, pTy), "get-result");
    }
    
    // Get struct from ABI-mangled representation, and store in the provided location.
    void getL(Type* dty, DValue* v, llvm::Value* lval) {
        LLValue* rval = v->getRVal();
        const LLType* pTy = getPtrToType(rval->getType());
        DtoStore(rval, DtoBitCast(lval, pTy));
    }
    
    // Turn a struct into an ABI-mangled representation
    LLValue* put(Type* dty, DValue* v)
    {
        LLValue* lval;
        if (v->isLVal()) {
            lval = v->getLVal();
        } else {
            // No memory location, create one.
            LLValue* rval = v->getRVal();
            lval = DtoAlloca(rval->getType());
            DtoStore(rval, lval);
        }
        
        LLType* abiTy = getAbiType(dty);
        assert(abiTy && "Why are we rewriting a non-rewritten type?");
        
        const LLType* pTy = getPtrToType(abiTy);
        return DtoLoad(DtoBitCast(lval, pTy), "put-result");
    }
    
    /// should return the transformed type for this rewrite
    const LLType* type(Type* dty, const LLType* t)
    {
        return getAbiType(dty);
    }
};


struct RegCount {
    unsigned char int_regs, sse_regs;
};


struct X86_64TargetABI : TargetABI {
    X86_64_C_struct_rewrite struct_rewrite;
    
    void newFunctionType(TypeFunction* tf) {
        funcTypeStack.push_back(FuncTypeData(tf->linkage));
    }
    
    bool returnInArg(TypeFunction* tf);
    
    bool passByVal(Type* t);
    
    void rewriteFunctionType(TypeFunction* tf);
    
    void doneWithFunctionType() {
        funcTypeStack.pop_back();
    }
    
private:
    struct FuncTypeData {
        LINK linkage;       // Linkage of the function type currently under construction
        RegCount state;     // bookkeeping for extern(C) parameter registers
        
        FuncTypeData(LINK linkage_)
        : linkage(linkage_)
        {
            state.int_regs = 6;
            state.sse_regs = 8;
        }
    };
    std::vector<FuncTypeData> funcTypeStack;
    
    LINK linkage() {
        assert(funcTypeStack.size() != 0);
        return funcTypeStack.back().linkage;
    }
    
    RegCount& state() {
        assert(funcTypeStack.size() != 0);
        return funcTypeStack.back().state;
    }
    
    void fixup(IrFuncTyArg& arg);
};


// The public getter for abi.cpp
TargetABI* getX86_64TargetABI() {
    return new X86_64TargetABI;
}


bool X86_64TargetABI::returnInArg(TypeFunction* tf) {
    assert(linkage() == tf->linkage);
    Type* rt = tf->next->toBasetype();
    
    if (tf->linkage == LINKd) {
        assert(rt->ty != Tsarray && "Update calling convention for static array returns");
        
        // All non-structs can be returned in registers.
        if (rt->ty != Tstruct)
            return false;
        
        // Try to figure out whether the struct fits in return registers
        // and whether it's a good idea to put it there.
        return !x86_64_D_cc::retStructInRegs((TypeStruct*) rt);
    } else {
        if (rt == Type::tvoid || keepUnchanged(rt))
            return false;
        
        Classification cl = classify(rt);
        return cl.isMemory;
    }
}

bool X86_64TargetABI::passByVal(Type* t) {
    t = t->toBasetype();
    if (linkage() == LINKd) {
        if (t->ty != Tstruct)
            return false;
        
        // Try to be smart about which structs are passed in memory.
        return !x86_64_D_cc::passStructTypeDirectly((TypeStruct*) t);
    } else {
        // This implements the C calling convention for x86-64.
        // It might not be correct for other calling conventions.
        Classification cl = classify(t);
        if (cl.isMemory)
            return true;
        
        // Figure out how many registers we want for this arg:
        RegCount wanted = { 0, 0 };
        for (int i = 0 ; i < 2; i++) {
            if (cl.classes[i] == Integer)
                wanted.int_regs++;
            else if (cl.classes[i] == Sse)
                wanted.sse_regs++;
        }
        
        // See if they're available:
        RegCount& state = this->state();
        if (wanted.int_regs <= state.int_regs && wanted.sse_regs <= state.sse_regs) {
            state.int_regs -= wanted.int_regs;
            state.sse_regs -= wanted.sse_regs;
        } else {
            if (keepUnchanged(t)) {
                // Not enough registers available, but this is passed as if it's
                // multiple arguments. Just use the registers there are,
                // automatically spilling the rest to memory.
                if (wanted.int_regs > state.int_regs)
                    state.int_regs = 0;
                else
                    state.int_regs -= wanted.int_regs;
                
                if (wanted.sse_regs > state.sse_regs)
                    state.sse_regs = 0;
                else
                    state.sse_regs -= wanted.sse_regs;
            } else if (t->iscomplex() || t->ty == Tstruct) {
                // Spill entirely to memory, even if some of the registers are
                // available.
                
                // FIXME: Don't do this if *none* of the wanted registers are available,
                //        (i.e. only when absolutely necessary for abi-compliance)
                //        so it gets alloca'd by the callee and -scalarrepl can
                //        more easily break it up?
                // Note: this won't be necessary if the following LLVM bug gets fixed:
                //       http://llvm.org/bugs/show_bug.cgi?id=3741
                return true;
            } else {
                assert(t == Type::tfloat80 || t == Type::timaginary80 || t->size() <= 8
                    && "What other big types are there?"); // other than static arrays...
                // In any case, they shouldn't be represented as structs in LLVM:
                assert(!isaStruct(DtoType(t)));
            }
        }
        // Everything else that's passed in memory is handled by LLVM.
        return false;
    }
}

// Helper function for rewriteFunctionType.
// Return type and parameters are passed here (unless they're already in memory)
// to get the rewrite applied (if necessary).
void X86_64TargetABI::fixup(IrFuncTyArg& arg) {
    LLType* abiTy = getAbiType(arg.type);
    
    if (abiTy && abiTy != arg.ltype) {
        assert(arg.type == Type::tcomplex32 || arg.type->ty == Tstruct);
        arg.ltype = abiTy;
        arg.rewrite = &struct_rewrite;
    }
}

void X86_64TargetABI::rewriteFunctionType(TypeFunction* tf) {
    // extern(D) is handled entirely by passByVal and returnInArg
    
    if (tf->linkage != LINKd) {
        // TODO: See if this is correct for more than just extern(C).
        
        IrFuncTy& fty = tf->fty;
        
        if (!fty.arg_sret) {
            Logger::println("x86-64 ABI: Transforming return type");
            Type* rt = fty.ret->type->toBasetype();
            if (rt != Type::tvoid)
                fixup(*fty.ret);
        }
        
        
        Logger::println("x86-64 ABI: Transforming arguments");
        LOG_SCOPE;
        
        for (IrFuncTy::ArgIter I = fty.args.begin(), E = fty.args.end(); I != E; ++I) {
            IrFuncTyArg& arg = **I;
            
            if (Logger::enabled())
                Logger::cout() << "Arg: " << arg.type->toChars() << '\n';
            
            // Arguments that are in memory are of no interest to us.
            if (arg.byref)
                continue;
            
            Type* ty = arg.type->toBasetype();
            
            fixup(arg);
            
            if (Logger::enabled())
                Logger::cout() << "New arg type: " << *arg.ltype << '\n';
        }
    }
}