changeset 945:03d7c4aac654

SWITCHED TO LLVM 2.5 ! Applied patch from ticket #129 to compile against latest LLVM. Thanks Frits van Bommel. Fixed implicit return by asm block at the end of a function on x86-32. Other architectures will produce an error at the moment. Adding support for new targets is fairly simple. Fixed return calling convention for complex numbers, ST and ST(1) were switched around. Added some testcases. I've run a dstress test and there are no regressions. However, the runtime does not seem to compile with symbolic debug information. -O3 -release -inline works well and is what I used for the dstress run. Tango does not compile, a small workaround is needed in tango.io.digest.Digest.Digest.hexDigest. See ticket #206 .
author Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
date Sun, 08 Feb 2009 05:26:54 +0100
parents eb310635d80e
children 1714836f2c0b
files dmd/declaration.h dmd/func.c dmd/idgen.c dmd/statement.c dmd/statement.h dmd2/declaration.h dmd2/func.c dmd2/idgen.c dmd2/statement.c dmd2/statement.h gen/aa.cpp gen/arrays.cpp gen/asmstmt.cpp gen/classes.cpp gen/functions.cpp gen/functions.h gen/irstate.h gen/llvmhelpers.cpp gen/naked.cpp gen/statements.cpp gen/structs.cpp gen/tocall.cpp gen/tollvm.cpp gen/tollvm.h gen/toobj.cpp ir/irstruct.cpp tests/mini/asm3.d tests/mini/asm5.d tests/mini/asm8.d
diffstat 29 files changed, 404 insertions(+), 81 deletions(-) [+]
line wrap: on
line diff
--- a/dmd/declaration.h	Sun Feb 08 05:14:24 2009 +0100
+++ b/dmd/declaration.h	Sun Feb 08 05:26:54 2009 +0100
@@ -663,6 +663,9 @@
 
     // if this is an array operation it gets a little special attention
     bool isArrayOp;
+
+    // true if overridden with the pragma(allow_inline); stmt
+    bool allowInlining;
 };
 
 struct FuncAliasDeclaration : FuncDeclaration
--- a/dmd/func.c	Sun Feb 08 05:14:24 2009 +0100
+++ b/dmd/func.c	Sun Feb 08 05:26:54 2009 +0100
@@ -80,6 +80,7 @@
 
     // LDC
     isArrayOp = false;
+    allowInlining = false;
 }
 
 Dsymbol *FuncDeclaration::syntaxCopy(Dsymbol *s)
--- a/dmd/idgen.c	Sun Feb 08 05:14:24 2009 +0100
+++ b/dmd/idgen.c	Sun Feb 08 05:26:54 2009 +0100
@@ -224,6 +224,7 @@
     { "vaend", "va_end" },
     { "vaarg", "va_arg" },
     { "ldc" },
+    { "allow_inline" },
 
     // For special functions
     { "tohash", "toHash" },
--- a/dmd/statement.c	Sun Feb 08 05:14:24 2009 +0100
+++ b/dmd/statement.c	Sun Feb 08 05:26:54 2009 +0100
@@ -2146,6 +2146,13 @@
 	}
 #endif
     }
+
+    // LDC
+    else if (ident == Id::allow_inline)
+    {
+        sc->func->allowInlining = true;
+    }
+
     else
         error("unrecognized pragma(%s)", ident->toChars());
 
--- a/dmd/statement.h	Sun Feb 08 05:14:24 2009 +0100
+++ b/dmd/statement.h	Sun Feb 08 05:26:54 2009 +0100
@@ -161,15 +161,16 @@
     // Back end
     virtual void toIR(IRState *irs);
 
-    // LDC
-    virtual void toNakedIR(IRState *irs);
-
     // Avoid dynamic_cast
     virtual DeclarationStatement *isDeclarationStatement() { return NULL; }
     virtual CompoundStatement *isCompoundStatement() { return NULL; }
     virtual ReturnStatement *isReturnStatement() { return NULL; }
     virtual IfStatement *isIfStatement() { return NULL; }
     virtual CaseStatement* isCaseStatement() { return NULL; }
+
+    // LDC
+    virtual void toNakedIR(IRState *irs);
+    virtual AsmBlockStatement* endsWithAsm();
 };
 
 struct ExpStatement : Statement
@@ -242,6 +243,7 @@
 
     // LDC
     virtual void toNakedIR(IRState *irs);
+    virtual AsmBlockStatement* endsWithAsm();
 
     virtual CompoundStatement *isCompoundStatement() { return this; }
 };
@@ -905,6 +907,9 @@
 
     void toIR(IRState *irs);
     void toNakedIR(IRState *irs);
+    AsmBlockStatement* endsWithAsm();
+
+    llvm::Value* abiret;
 };
 
 #endif /* DMD_STATEMENT_H */
--- a/dmd2/declaration.h	Sun Feb 08 05:14:24 2009 +0100
+++ b/dmd2/declaration.h	Sun Feb 08 05:26:54 2009 +0100
@@ -675,6 +675,9 @@
 
     // if this is an array operation it gets a little special attention
     bool isArrayOp;
+
+    // true if overridden with the pragma(allow_inline); stmt
+    bool allowInlining;
 };
 
 struct FuncAliasDeclaration : FuncDeclaration
--- a/dmd2/func.c	Sun Feb 08 05:14:24 2009 +0100
+++ b/dmd2/func.c	Sun Feb 08 05:26:54 2009 +0100
@@ -80,6 +80,7 @@
 
     // LDC
     isArrayOp = false;
+    allowInlining = false;
 }
 
 Dsymbol *FuncDeclaration::syntaxCopy(Dsymbol *s)
--- a/dmd2/idgen.c	Sun Feb 08 05:14:24 2009 +0100
+++ b/dmd2/idgen.c	Sun Feb 08 05:26:54 2009 +0100
@@ -240,6 +240,7 @@
     { "vaend", "va_end" },
     { "vaarg", "va_arg" },
     { "ldc" },
+    { "allow_inline" },
 
     // For special functions
     { "tohash", "toHash" },
--- a/dmd2/statement.c	Sun Feb 08 05:14:24 2009 +0100
+++ b/dmd2/statement.c	Sun Feb 08 05:26:54 2009 +0100
@@ -2197,6 +2197,13 @@
 	condition = new AssignExp(loc, v, condition);
 	condition = condition->semantic(scd);
     }
+
+    // LDC
+    else if (ident == Id::allow_inline)
+    {
+        sc->func->allowInlining = true;
+    }
+
     else
 	scd = sc->push();
     ifbody = ifbody->semantic(scd);
--- a/dmd2/statement.h	Sun Feb 08 05:14:24 2009 +0100
+++ b/dmd2/statement.h	Sun Feb 08 05:26:54 2009 +0100
@@ -162,15 +162,16 @@
     // Back end
     virtual void toIR(IRState *irs);
 
-    // LDC
-    virtual void toNakedIR(IRState *irs);
-
     // Avoid dynamic_cast
     virtual DeclarationStatement *isDeclarationStatement() { return NULL; }
     virtual CompoundStatement *isCompoundStatement() { return NULL; }
     virtual ReturnStatement *isReturnStatement() { return NULL; }
     virtual IfStatement *isIfStatement() { return NULL; }
     virtual CaseStatement* isCaseStatement() { return NULL; }
+
+    // LDC
+    virtual void toNakedIR(IRState *irs);
+    virtual AsmBlockStatement* endsWithAsm();
 };
 
 struct ExpStatement : Statement
@@ -245,6 +246,7 @@
 
     // LDC
     virtual void toNakedIR(IRState *irs);
+    virtual AsmBlockStatement* endsWithAsm();
 
     virtual CompoundStatement *isCompoundStatement() { return this; }
 };
@@ -941,6 +943,9 @@
 
     void toIR(IRState *irs);
     virtual void toNakedIR(IRState *irs);
+    AsmBlockStatement* endsWithAsm();
+
+    llvm::Value* abiret;
 };
 
 #endif /* DMD_STATEMENT_H */
--- a/gen/aa.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/aa.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -77,7 +77,7 @@
     keyti = DtoBitCast(keyti, funcTy->getParamType(1));
 
     // valuesize param
-    LLValue* valsize = DtoConstSize_t(getABITypeSize(DtoType(type)));
+    LLValue* valsize = DtoConstSize_t(getTypePaddedSize(DtoType(type)));
 
     // pkey param
     LLValue* pkey = to_pkey(loc, key);
--- a/gen/arrays.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/arrays.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -100,7 +100,7 @@
     // this simplifies codegen later on as llvm null's have no address!
     if (isaConstant(val) && isaConstant(val)->isNullValue())
     {
-        size_t X = getABITypeSize(val->getType());
+        size_t X = getTypePaddedSize(val->getType());
         LLValue* nbytes = gIR->ir->CreateMul(dim, DtoConstSize_t(X), ".nbytes");
         DtoMemSetZero(ptr, nbytes);
         return;
@@ -181,7 +181,7 @@
         assert(arrayelemty == valuety && "ArrayInit doesn't work on elem-initialized static arrays");
         args[0] = DtoBitCast(args[0], getVoidPtrType());
         args[2] = DtoBitCast(args[2], getVoidPtrType());
-        args.push_back(DtoConstSize_t(getABITypeSize(DtoType(arrayelemty))));
+        args.push_back(DtoConstSize_t(getTypePaddedSize(DtoType(arrayelemty))));
         break;
 
     default:
@@ -331,7 +331,7 @@
 {
     assert(e->len != 0);
     const LLType* t = e->ptr->getType()->getContainedType(0);
-    sz = gIR->ir->CreateMul(DtoConstSize_t(getABITypeSize(t)), e->len, "tmp");
+    sz = gIR->ir->CreateMul(DtoConstSize_t(getTypePaddedSize(t)), e->len, "tmp");
     return e->ptr;
 }
 
@@ -362,7 +362,7 @@
 {
     Logger::println("StaticArrayCopy");
 
-    size_t n = getABITypeSize(dst->getType()->getContainedType(0));
+    size_t n = getTypePaddedSize(dst->getType()->getContainedType(0));
     DtoMemCpy(dst, src, DtoConstSize_t(n));
 }
 
@@ -534,7 +534,7 @@
     src1 = gIR->ir->CreateGEP(src1,len1,"tmp");
 
     // memcpy
-    LLValue* elemSize = DtoConstSize_t(getABITypeSize(src2->getType()->getContainedType(0)));
+    LLValue* elemSize = DtoConstSize_t(getTypePaddedSize(src2->getType()->getContainedType(0)));
     LLValue* bytelen = gIR->ir->CreateMul(len2, elemSize, "tmp");
     DtoMemCpy(src1,src2,bytelen);
 
@@ -570,7 +570,7 @@
     src2 = DtoArrayPtr(e2);
 
     // first memcpy
-    LLValue* elemSize = DtoConstSize_t(getABITypeSize(src1->getType()->getContainedType(0)));
+    LLValue* elemSize = DtoConstSize_t(getTypePaddedSize(src1->getType()->getContainedType(0)));
     LLValue* bytelen = gIR->ir->CreateMul(len1, elemSize, "tmp");
     DtoMemCpy(mem,src1,bytelen);
 
@@ -613,7 +613,7 @@
 
         mem = gIR->ir->CreateGEP(mem,DtoConstSize_t(1),"tmp");
 
-        LLValue* elemSize = DtoConstSize_t(getABITypeSize(src1->getType()->getContainedType(0)));
+        LLValue* elemSize = DtoConstSize_t(getTypePaddedSize(src1->getType()->getContainedType(0)));
         LLValue* bytelen = gIR->ir->CreateMul(len1, elemSize, "tmp");
         DtoMemCpy(mem,src1,bytelen);
 
@@ -632,7 +632,7 @@
 
         src1 = DtoArrayPtr(e1);
 
-        LLValue* elemSize = DtoConstSize_t(getABITypeSize(src1->getType()->getContainedType(0)));
+        LLValue* elemSize = DtoConstSize_t(getTypePaddedSize(src1->getType()->getContainedType(0)));
         LLValue* bytelen = gIR->ir->CreateMul(len1, elemSize, "tmp");
         DtoMemCpy(mem,src1,bytelen);
 
@@ -769,8 +769,8 @@
     assert(elemty);
     assert(newelemty);
 
-    size_t esz = getABITypeSize(elemty);
-    size_t nsz = getABITypeSize(newelemty);
+    size_t esz = getTypePaddedSize(elemty);
+    size_t nsz = getTypePaddedSize(newelemty);
     if (esz == nsz)
         return len;
 
--- a/gen/asmstmt.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/asmstmt.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -26,6 +26,7 @@
 #include "gen/logger.h"
 #include "gen/todebug.h"
 #include "gen/llvmhelpers.h"
+#include "gen/functions.h"
 
 typedef enum {
     Arg_Integer,
@@ -400,6 +401,8 @@
 {
     enclosinghandler = NULL;
     tf = NULL;
+
+    abiret = NULL;
 }
 
 // rewrite argument indices to the block scope indices
@@ -452,18 +455,21 @@
     }
 }
 
+LLValue* DtoAggrPairSwap(LLValue* aggr);
+
 void AsmBlockStatement::toIR(IRState* p)
 {
     Logger::println("AsmBlockStatement::toIR(): %s", loc.toChars());
     LOG_SCOPE;
     Logger::println("BEGIN ASM");
 
-    // disable inlining
-    gIR->func()->setNeverInline();
+    // disable inlining by default
+    if (!p->func()->decl->allowInlining)
+        p->func()->setNeverInline();
 
     // create asm block structure
     assert(!p->asmBlock);
-    IRAsmBlock* asmblock = new IRAsmBlock;
+    IRAsmBlock* asmblock = new IRAsmBlock(this);
     assert(asmblock);
     p->asmBlock = asmblock;
 
@@ -562,6 +568,20 @@
     }
 
 
+    // build a fall-off-end-properly asm statement
+
+    FuncDeclaration* thisfunc = p->func()->decl;
+    bool useabiret = false;
+    p->asmBlock->asmBlock->abiret = NULL;
+    if (thisfunc->fbody->endsWithAsm() == this && thisfunc->type->nextOf()->ty != Tvoid)
+    {
+        // there can't be goto forwarders in this case
+        assert(gotoToVal.empty());
+        emitABIReturnAsmStmt(asmblock, loc, thisfunc);
+        useabiret = true;
+    }
+
+
     // build asm block
     std::vector<LLValue*> outargs;
     std::vector<LLValue*> inargs;
@@ -571,8 +591,9 @@
     std::string in_c;
     std::string clobbers;
     std::string code;
-    size_t asmIdx = 0;
+    size_t asmIdx = asmblock->retn;
 
+    Logger::println("do outputs");
     size_t n = asmblock->s.size();
     for (size_t i=0; i<n; ++i)
     {
@@ -590,6 +611,8 @@
         }
         remap_outargs(a->code, onn+a->in.size(), asmIdx);
     }
+
+    Logger::println("do inputs");
     for (size_t i=0; i<n; ++i)
     {
         IRAsmStmt* a = asmblock->s[i];
@@ -628,10 +651,18 @@
     Logger::println("code = \"%s\"", code.c_str());
     Logger::println("constraints = \"%s\"", out_c.c_str());
 
+    // build return types
+    const LLType* retty;
+    if (asmblock->retn)
+        retty = asmblock->retty;
+    else
+        retty = llvm::Type::VoidTy;
+
+    // build argument types
     std::vector<const LLType*> types;
     types.insert(types.end(), outtypes.begin(), outtypes.end());
     types.insert(types.end(), intypes.begin(), intypes.end());
-    llvm::FunctionType* fty = llvm::FunctionType::get(llvm::Type::VoidTy, types, false);
+    llvm::FunctionType* fty = llvm::FunctionType::get(retty, types, false);
     if (Logger::enabled())
         Logger::cout() << "function type = " << *fty << '\n';
     llvm::InlineAsm* ia = llvm::InlineAsm::get(fty, code, out_c, true);
@@ -640,6 +671,10 @@
     args.insert(args.end(), outargs.begin(), outargs.end());
     args.insert(args.end(), inargs.begin(), inargs.end());
     llvm::CallInst* call = p->ir->CreateCall(ia, args.begin(), args.end(), "");
+    if (useabiret)
+    {
+        p->asmBlock->asmBlock->abiret = call;
+    }
 
     p->asmBlock = NULL;
     Logger::println("END ASM");
--- a/gen/classes.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/classes.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -973,8 +973,8 @@
 {
     DtoForceConstInitDsymbol(tc->sym);
 
-    size_t presz = 2*getABITypeSize(DtoSize_t());
-    uint64_t n = getABITypeSize(tc->ir.type->get()) - presz;
+    size_t presz = 2*getTypePaddedSize(DtoSize_t());
+    uint64_t n = getTypePaddedSize(tc->ir.type->get()) - presz;
 
     // set vtable field seperately, this might give better optimization
     assert(tc->sym->ir.irStruct->vtbl);
@@ -1494,7 +1494,7 @@
     {
         c = DtoBitCast(ir->init, voidPtr);
         //Logger::cout() << *ir->constInit->getType() << std::endl;
-        size_t initsz = getABITypeSize(ir->init->getType()->getContainedType(0));
+        size_t initsz = getTypePaddedSize(ir->init->getType()->getContainedType(0));
         c = DtoConstSlice(DtoConstSize_t(initsz), c);
     }
     inits.push_back(c);
--- a/gen/functions.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/functions.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -907,6 +907,10 @@
     // output function body
     fd->fbody->toIR(gIR);
 
+    // TODO: clean up this mess
+
+//     std::cout << *func << std::endl;
+
     // llvm requires all basic blocks to end with a TerminatorInst but DMD does not put a return statement
     // in automatically, so we do it here.
     if (!gIR->scopereturned()) {
@@ -917,12 +921,23 @@
         }
         else {
             if (!fd->isMain())
-                llvm::ReturnInst::Create(llvm::UndefValue::get(func->getReturnType()), gIR->scopebb());
+            {
+                AsmBlockStatement* asmb = fd->fbody->endsWithAsm();
+                if (asmb) {
+                    assert(asmb->abiret);
+                    llvm::ReturnInst::Create(asmb->abiret, gIR->scopebb());
+                }
+                else {
+                    llvm::ReturnInst::Create(llvm::UndefValue::get(func->getReturnType()), gIR->scopebb());
+                }
+            }
             else
                 llvm::ReturnInst::Create(llvm::Constant::getNullValue(func->getReturnType()), gIR->scopebb());
         }
     }
 
+//     std::cout << *func << std::endl;
+
     // erase alloca point
     allocaPoint->eraseFromParent();
     allocaPoint = 0;
@@ -934,28 +949,9 @@
     assert(!func->getBasicBlockList().empty());
     func->getBasicBlockList().pop_back();
 
-    // if the last block is empty now, it must be unreachable or it's a bug somewhere else
-    // would be nice to figure out how to assert that this is correct
-    llvm::BasicBlock* lastbb = &func->getBasicBlockList().back();
-    if (lastbb->empty())
-    {
-        new llvm::UnreachableInst(lastbb);
-    }
+    gIR->functions.pop_back();
 
-    // if the last block is not terminated we return a null value or void
-    // for some unknown reason this is needed when a void main() has a inline asm block ...
-    // this should be harmless for well formed code!
-    lastbb = &func->getBasicBlockList().back();
-    if (!lastbb->getTerminator())
-    {
-        Logger::println("adding missing return statement");
-        if (func->getReturnType() == LLType::VoidTy)
-            llvm::ReturnInst::Create(lastbb);
-        else
-            llvm::ReturnInst::Create(llvm::Constant::getNullValue(func->getReturnType()), lastbb);
-    }
-
-    gIR->functions.pop_back();
+//     std::cout << *func << std::endl;
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////
--- a/gen/functions.h	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/functions.h	Sun Feb 08 05:26:54 2009 +0100
@@ -9,7 +9,9 @@
 void DtoResolveFunction(FuncDeclaration* fdecl);
 void DtoDeclareFunction(FuncDeclaration* fdecl);
 void DtoDefineFunction(FuncDeclaration* fd);
+
 void DtoDefineNakedFunction(FuncDeclaration* fd);
+void emitABIReturnAsmStmt(IRAsmBlock* asmblock, Loc loc, FuncDeclaration* fdecl);
 
 DValue* DtoArgument(Argument* fnarg, Expression* argexp);
 void DtoVariadicArgument(Expression* argexp, LLValue* dst);
--- a/gen/irstate.h	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/irstate.h	Sun Feb 08 05:26:54 2009 +0100
@@ -2,6 +2,7 @@
 #define LDC_GEN_IRSTATE_H
 
 #include <vector>
+#include <deque>
 #include <list>
 #include <sstream>
 
@@ -78,11 +79,17 @@
 
 struct IRAsmBlock
 {
-    std::vector<IRAsmStmt*> s;
+    std::deque<IRAsmStmt*> s;
     std::set<std::string> clobs;
 
     // stores the labels within the asm block
     std::vector<Identifier*> internalLabels;
+
+    AsmBlockStatement* asmBlock;
+    const LLType* retty;
+    unsigned retn;
+
+    IRAsmBlock(AsmBlockStatement* b) : asmBlock(b), retty(NULL), retn(0) {}
 };
 
 // llvm::CallInst and llvm::InvokeInst don't share a common base
--- a/gen/llvmhelpers.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/llvmhelpers.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -1486,7 +1486,6 @@
     return exp->toConstElem(gIR);
 }
 
-
 //////////////////////////////////////////////////////////////////////////////////////////
 
 void DtoAnnotation(const char* str)
--- a/gen/naked.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/naked.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -9,6 +9,7 @@
 #include "gen/logger.h"
 #include "gen/irstate.h"
 #include "gen/llvmhelpers.h"
+#include "gen/tollvm.h"
 
 //////////////////////////////////////////////////////////////////////////////////////////
 
@@ -164,3 +165,63 @@
 
     gIR->functions.pop_back();
 }
+
+//////////////////////////////////////////////////////////////////////////////////////////
+
+void emitABIReturnAsmStmt(IRAsmBlock* asmblock, Loc loc, FuncDeclaration* fdecl)
+{
+    Logger::println("emitABIReturnAsmStmt(%s)", fdecl->mangle());
+    LOG_SCOPE;
+
+    IRAsmStmt* as = new IRAsmStmt;
+
+    const LLType* llretTy = DtoType(fdecl->type->nextOf());
+    asmblock->retty = llretTy;
+    asmblock->retn = 1;
+
+    // x86
+    if (global.params.cpu == ARCHx86)
+    {
+        LINK l = fdecl->linkage;
+        assert((l == LINKd || l == LINKc || l == LINKwindows) && "invalid linkage for asm implicit return");
+
+        Type* rt = fdecl->type->nextOf()->toBasetype();
+        if (rt->isintegral() || rt->ty == Tpointer || rt->ty == Tclass || rt->ty == Taarray)
+        {
+            if (rt->size() == 8) {
+                as->out_c = "=A,";
+            } else {
+                as->out_c = "={ax},";
+            }
+        }
+        else if (rt->isfloating())
+        {
+            if (rt->iscomplex()) {
+                as->out_c = "={st},={st(1)},";
+                asmblock->retn = 2;
+            } else {
+                as->out_c = "={st},";
+            }
+        }
+        else if (rt->ty == Tarray || rt->ty == Tdelegate)
+        {
+            as->out_c = "={ax},={dx},";
+            asmblock->retn = 2;
+        }
+        else
+        {
+            error(loc, "unimplemented return type '%s' for implicit abi return", rt->toChars());
+            fatal();
+        }
+    }
+
+    // unsupported
+    else
+    {
+        error(loc, "this target (%s) does not implement inline asm falling off the end of the function", global.params.targetTriple);
+        fatal();
+    }
+
+    // return values always go in the front
+    asmblock->s.push_front(as);
+}
--- a/gen/statements.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/statements.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -81,6 +81,12 @@
             LLValue* v = e->getRVal();
             delete e;
 
+            // swap real/imag parts on a x87
+            if (global.params.cpu == ARCHx86 && exp->type->toBasetype()->iscomplex())
+            {
+                v = DtoAggrPairSwap(v);
+            }
+
             if (Logger::enabled())
                 Logger::cout() << "return value is '" <<*v << "'\n";
 
@@ -1198,6 +1204,9 @@
         a->code += ":";
         p->asmBlock->s.push_back(a);
         p->asmBlock->internalLabels.push_back(ident);
+
+        // disable inlining
+        gIR->func()->setNeverInline();
     }
     else
     {
@@ -1453,3 +1462,31 @@
 #if DMDV2
 STUBST(PragmaStatement);
 #endif
+
+//////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+
+AsmBlockStatement* Statement::endsWithAsm()
+{
+    // does not end with inline asm
+    return NULL;
+}
+
+AsmBlockStatement* CompoundStatement::endsWithAsm()
+{
+    // make the last inner statement decide
+    if (statements && statements->dim)
+    {
+        unsigned last = statements->dim - 1;
+        Statement* s = (Statement*)statements->data[last];
+        if (s) return s->endsWithAsm();
+    }
+    return NULL;
+}
+
+AsmBlockStatement* AsmBlockStatement::endsWithAsm()
+{
+    // yes this is inline asm
+    return this;
+}
--- a/gen/structs.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/structs.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -239,7 +239,7 @@
     }
 
     // there might still be padding after the last one, make sure that is defaulted/zeroed as well
-    size_t structsize = getABITypeSize(structtype);
+    size_t structsize = getTypePaddedSize(structtype);
 
     // if there is space before the next explicit initializer
     // FIXME: this should be handled in the loop above as well
@@ -292,7 +292,7 @@
     {
         Logger::cout() << "constant struct initializer: " << *c << '\n';
     }
-    assert(getABITypeSize(c->getType()) == structsize);
+    assert(getTypePaddedSize(c->getType()) == structsize);
     return c;
 }
 
@@ -406,7 +406,7 @@
 
     // fill out rest with default initializers
     const LLType* structtype = DtoType(sd->type);
-    size_t structsize = getABITypeSize(structtype);
+    size_t structsize = getTypePaddedSize(structtype);
 
     // FIXME: this could probably share some code with the above
     if (structsize > lastoffset+lastsize)
@@ -558,7 +558,7 @@
         printf("    index: %u offset: %u\n", v->ir.irField->index, v->ir.irField->unionOffset);
     }
 
-    unsigned llvmSize = (unsigned)getABITypeSize(ST);
+    unsigned llvmSize = (unsigned)getTypePaddedSize(ST);
     unsigned dmdSize = (unsigned)sd->type->size();
     printf("  llvm size: %u     dmd size: %u\n", llvmSize, dmdSize);
     assert(llvmSize == dmdSize);
@@ -685,7 +685,7 @@
         cmpop = llvm::ICmpInst::ICMP_NE;
 
     // call memcmp
-    size_t sz = getABITypeSize(DtoType(t));
+    size_t sz = getTypePaddedSize(DtoType(t));
     LLValue* val = DtoMemCmp(lhs->getRVal(), rhs->getRVal(), DtoConstSize_t(sz));
     return gIR->ir->CreateICmp(cmpop, val, LLConstantInt::get(val->getType(), 0, false), "tmp");
 }
--- a/gen/tocall.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/tocall.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -136,7 +136,7 @@
     {
         Expression* argexp = (Expression*)arguments->data[i];
         vtypes.push_back(DtoType(argexp->type));
-        size_t sz = getABITypeSize(vtypes.back());
+        size_t sz = getTypePaddedSize(vtypes.back());
         if (sz < PTRSIZE)
             vtypes.back() = DtoSize_t();
     }
@@ -463,6 +463,12 @@
     // get return value
     LLValue* retllval = (retinptr) ? args[0] : call->get();
 
+    // swap real/imag parts on a x87
+    if (global.params.cpu == ARCHx86 && tf->nextOf()->toBasetype()->iscomplex())
+    {
+        retllval = DtoAggrPairSwap(retllval);
+    }
+
     // repaint the type if necessary
     if (resulttype)
     {
--- a/gen/tollvm.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/tollvm.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -419,11 +419,9 @@
 {
     dst = DtoBitCast(dst,getVoidPtrType());
 
-    llvm::Function* fn;
-    if (global.params.is64bit)
-        fn = GET_INTRINSIC_DECL(memset_i64);
-    else
-        fn = GET_INTRINSIC_DECL(memset_i32);
+    const LLType* intTy = DtoSize_t();
+    llvm::Function* fn = llvm::Intrinsic::getDeclaration(gIR->module,
+        llvm::Intrinsic::memset, &intTy, 1);
 
     gIR->ir->CreateCall4(fn, dst, DtoConstUbyte(0), nbytes, DtoConstUint(0), "");
 }
@@ -435,11 +433,9 @@
     dst = DtoBitCast(dst,getVoidPtrType());
     src = DtoBitCast(src,getVoidPtrType());
 
-    llvm::Function* fn;
-    if (global.params.is64bit)
-        fn = GET_INTRINSIC_DECL(memcpy_i64);
-    else
-        fn = GET_INTRINSIC_DECL(memcpy_i32);
+    const LLType* intTy = DtoSize_t();
+    llvm::Function* fn = llvm::Intrinsic::getDeclaration(gIR->module,
+        llvm::Intrinsic::memcpy, &intTy, 1);
 
     gIR->ir->CreateCall4(fn, dst, src, nbytes, DtoConstUint(0), "");
 }
@@ -700,9 +696,9 @@
     return gTargetData->getTypeStoreSize(t);
 }
 
-size_t getABITypeSize(const LLType* t)
+size_t getTypePaddedSize(const LLType* t)
 {
-    size_t sz = gTargetData->getABITypeSize(t);
+    size_t sz = gTargetData->getTypePaddedSize(t);
     //Logger::cout() << "abi type size of: " << *t << " == " << sz << '\n';
     return sz;
 }
@@ -728,7 +724,7 @@
     {
         const LLType* T = *begin;
 
-        size_t sz = getABITypeSize(T);
+        size_t sz = getTypePaddedSize(T);
         size_t ali = getABITypeAlign(T);
         if (sz > bigSize || (sz == bigSize && ali > bigAlign))
         {
@@ -881,3 +877,11 @@
     V = DtoBitCast(V, as->getContainedType(1));
     return gIR->ir->CreateInsertValue(res, V, 1, "tmp");
 }
+
+LLValue* DtoAggrPairSwap(LLValue* aggr)
+{
+    Logger::println("swapping aggr pair");
+    LLValue* r = gIR->ir->CreateExtractValue(aggr, 0);
+    LLValue* i = gIR->ir->CreateExtractValue(aggr, 1);
+    return DtoAggrPair(i, r, "swapped");
+}
--- a/gen/tollvm.h	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/tollvm.h	Sun Feb 08 05:26:54 2009 +0100
@@ -92,7 +92,7 @@
 // type sizes
 size_t getTypeBitSize(const LLType* t);
 size_t getTypeStoreSize(const LLType* t);
-size_t getABITypeSize(const LLType* t);
+size_t getTypePaddedSize(const LLType* t);
 
 // type alignments
 unsigned char getABITypeAlign(const LLType* t);
@@ -105,6 +105,7 @@
 LLValue* DtoAggrPair(const LLType* type, LLValue* V1, LLValue* V2, const char* name = 0);
 LLValue* DtoAggrPair(LLValue* V1, LLValue* V2, const char* name = 0);
 LLValue* DtoAggrPaint(LLValue* aggr, const LLType* as);
+LLValue* DtoAggrPairSwap(LLValue* aggr);
 
 /**
  * Generates a call to llvm.memset.i32 (or i64 depending on architecture).
--- a/gen/toobj.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/gen/toobj.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -255,7 +255,7 @@
         Logger::println("Writing native asm to: %s\n", spath.c_str());
         std::string err;
         {
-            llvm::raw_fd_ostream out(spath.c_str(), err);
+            llvm::raw_fd_ostream out(spath.c_str(), false, err);
             write_asm_to_file(Target, *ir.module, out);
         }
 
--- a/ir/irstruct.cpp	Sun Feb 08 05:14:24 2009 +0100
+++ b/ir/irstruct.cpp	Sun Feb 08 05:26:54 2009 +0100
@@ -328,7 +328,7 @@
 
         // there might still be padding after the last one, make sure that is zeroed as well
         // is there space in between last last offset and this one?
-        size_t structsize = getABITypeSize(structtype);
+        size_t structsize = getTypePaddedSize(structtype);
 
         if (structsize > lastoffset+lastsize)
         {
--- a/tests/mini/asm3.d	Sun Feb 08 05:14:24 2009 +0100
+++ b/tests/mini/asm3.d	Sun Feb 08 05:26:54 2009 +0100
@@ -8,21 +8,20 @@
     printf(fmt);
     version (LLVM_InlineAsm_X86)
     {
-	asm
-    	{
-		push fmt;
-        	call printf;
-        	pop EAX;
-    	}
+        asm
+        {
+            push fmt;
+            call printf;
+            pop EAX;
+        }
     }
     else version(LLVM_InlineAsm_X86_64)
     {
         asm
         {
-                movq    RDI, fmt;
-                xor     AL, AL;
-                call    printf;
+            movq    RDI, fmt;
+            xor     AL, AL;
+            call    printf;
         }
     }
-
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/mini/asm5.d	Sun Feb 08 05:26:54 2009 +0100
@@ -0,0 +1,23 @@
+int foo()
+{
+    version(X86)
+    asm { mov EAX, 42; }
+    else static assert(0, "todo");
+}
+
+ulong bar()
+{
+    version(X86)
+    asm { mov EAX, 0xFF; mov EDX, 0xAA; }
+    else static assert(0, "todo");
+}
+
+void main()
+{
+    long l = 1;
+    l = 2;
+    l = 4;
+    l = 8;
+    assert(foo() == 42);
+    assert(bar() == 0x000000AA000000FF);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/mini/asm8.d	Sun Feb 08 05:26:54 2009 +0100
@@ -0,0 +1,119 @@
+int foo()
+{
+    version(X86)
+    asm { mov EAX, 42; }
+    else static assert(0, "todo");
+}
+
+ulong bar()
+{
+    version(X86)
+    asm { mov EDX, 0xAA; mov EAX, 0xFF; }
+    else static assert(0, "todo");
+}
+
+float onef()
+{
+    version(X86)
+    asm { fld1; }
+    else static assert(0, "todo");
+}
+
+double oned()
+{
+    version(X86)
+    asm { fld1; }
+    else static assert(0, "todo");
+}
+
+real oner()
+{
+    version(X86)
+    asm { fld1; }
+    else static assert(0, "todo");
+}
+
+real two = 2.0;
+
+creal cr()
+{
+    version(X86)
+    asm { fld1; fld two; }
+    else static assert(0, "todo");
+}
+
+creal cr2()
+{
+    version(X86)
+    asm
+    {
+        naked;
+        fld1;
+        fld two;
+        ret;
+    }
+    else static assert(0, "todo");
+}
+
+void* vp()
+{
+    version(X86)
+    asm { mov EAX, 0x80; }
+    else static assert(0, "todo");
+}
+
+int[int] gaa;
+
+int[int] aa()
+{
+    version(X86)
+    asm { mov EAX, gaa; }
+    else static assert(0, "todo");
+}
+
+Object gobj;
+
+Object ob()
+{
+    version(X86)
+    asm { mov EAX, gobj; }
+    else static assert(0, "todo");
+}
+
+char[] ghello = "hello world";
+
+char[] str()
+{
+    version(X86)
+    asm { lea ECX, ghello; mov EAX, [ECX]; mov EDX, [ECX+4]; }
+    else static assert(0, "todo");
+}
+
+char[] delegate() dg()
+{
+    version(X86)
+    asm { mov EAX, gobj; lea EDX, Object.toString; }
+    else static assert(0, "todo");
+}
+
+void main()
+{
+    gaa[4] = 5;
+    gobj = new Object;
+    auto adg = &gobj.toString;
+
+    assert(foo() == 42);
+    assert(bar() == 0x000000AA000000FF);
+    assert(onef() == 1);
+    assert(oned() == 1);
+    assert(oner() == 1);
+    assert(cr() == 1+2i);
+    assert(cr2() == 1+2i);
+    assert(vp() == cast(void*)0x80);
+    assert(aa() is gaa);
+    assert(ob() is gobj);
+    assert(str() == "hello world");
+    assert(dg()() == "object.Object");
+}
+
+extern(C) int printf(char*, ...);