# HG changeset patch # User Frits van Bommel # Date 1244457355 -7200 # Node ID 9ed0695cb93cab3db4573fadda891a36a205f4f2 # Parent 9344fecd24f05262b2259888cd1a1d48c88a8598 Teach `-dgc2stack` to promote GC allocations in simple loops to stack allocations too. (A "simple" loop is one where the allocation isn't used in a subsequent iteration) This also means it's no longer necessary to run this pass multiple times. Running it once after inlining should now catch all cases. diff -r 9344fecd24f0 -r 9ed0695cb93c gen/optimizer.cpp --- a/gen/optimizer.cpp Sun Jun 07 23:00:53 2009 +0200 +++ b/gen/optimizer.cpp Mon Jun 08 12:35:55 2009 +0200 @@ -133,12 +133,6 @@ addPass(pm, createCFGSimplificationPass()); addPass(pm, createPruneEHPass()); addPass(pm, createFunctionAttrsPass()); - -#ifdef USE_METADATA - if (!disableLangSpecificPasses && !disableGCToStack) - addPass(pm, createGarbageCollect2Stack()); -#endif - addPass(pm, createTailCallEliminationPass()); addPass(pm, createCFGSimplificationPass()); } @@ -152,11 +146,6 @@ addPass(pm, createScalarReplAggregatesPass()); addPass(pm, createInstructionCombiningPass()); -#ifdef USE_METADATA - if (!disableLangSpecificPasses && !disableGCToStack) - addPass(pm, createGarbageCollect2Stack()); -#endif - // Inline again, to catch things like foreach delegates // passed to inlined opApply's where the function wasn't // known during the first inliner pass. @@ -165,11 +154,6 @@ // Run clean-up again. addPass(pm, createScalarReplAggregatesPass()); addPass(pm, createInstructionCombiningPass()); - -#ifdef USE_METADATA - if (!disableLangSpecificPasses && !disableGCToStack) - addPass(pm, createGarbageCollect2Stack()); -#endif } } @@ -179,9 +163,10 @@ #ifdef USE_METADATA if (!disableGCToStack) { - // Run some clean-up after the last GC to stack promotion pass. + addPass(pm, createGarbageCollect2Stack()); + // Run some clean-up + addPass(pm, createInstructionCombiningPass()); addPass(pm, createScalarReplAggregatesPass()); - addPass(pm, createInstructionCombiningPass()); addPass(pm, createCFGSimplificationPass()); } #endif diff -r 9344fecd24f0 -r 9ed0695cb93c gen/passes/GarbageCollect2Stack.cpp --- a/gen/passes/GarbageCollect2Stack.cpp Sun Jun 07 23:00:53 2009 +0200 +++ b/gen/passes/GarbageCollect2Stack.cpp Mon Jun 08 12:35:55 2009 +0200 @@ -29,11 +29,12 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/IRBuilder.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Compiler.h" @@ -49,10 +50,12 @@ struct Analysis { TargetData& TD; const Module& M; + DominatorTree& DT; CallGraph* CG; CallGraphNode* CGNode; const Type* getTypeFor(Value* typeinfo) const; + bool isSafeToStackAllocate(Instruction* Alloc); }; } @@ -269,8 +272,10 @@ virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); } }; char GarbageCollect2Stack::ID = 0; @@ -319,12 +324,12 @@ bool GarbageCollect2Stack::runOnFunction(Function &F) { DEBUG(DOUT << "Running -dgc2stack on function " << F.getName() << '\n'); - TargetData &TD = getAnalysis(); - const LoopInfo &LI = getAnalysis(); + TargetData& TD = getAnalysis(); + DominatorTree& DT = getAnalysis(); CallGraph* CG = getAnalysisIfAvailable(); CallGraphNode* CGNode = CG ? (*CG)[&F] : NULL; - Analysis A = { TD, *M, CG, CGNode }; + Analysis A = { TD, *M, DT, CG, CGNode }; BasicBlock& Entry = F.getEntryBlock(); @@ -332,15 +337,6 @@ bool Changed = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - // We don't yet have sufficient analysis to properly determine if - // allocations will be unreferenced when the loop returns to their - // allocation point, so we're playing it safe by ignoring allocations - // in loops. - // TODO: Analyze loops too... - if (LI.getLoopFor(BB)) { - continue; - } - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { // Ignore non-calls. Instruction* Inst = I++; @@ -374,7 +370,7 @@ DEBUG(DOUT << "GarbageCollect2Stack inspecting: " << *Inst); - if (!info->analyze(CS, A) || PointerMayBeCaptured(Inst, true)) + if (!info->analyze(CS, A) || !A.isSafeToStackAllocate(Inst)) continue; // Let's alloca this! @@ -424,4 +420,106 @@ } +/// isSafeToStackAllocate - Return true if the GC call passed in is safe to turn +/// into a stack allocation. This requires that the return value does not +/// escape from the function and no derived pointers are live at the call site +/// (i.e. if it's in a loop then the function can't use any pointer returned +/// from an earlier call after a new call has been made) +/// +/// This is currently conservative where loops are involved: it can handle +/// simple loops, but returns false if any derived pointer is used in a +/// subsequent iteration. +/// +/// Based on LLVM's PointerMayBeCaptured(), which only does escape analysis but +/// doesn't care about loops. +bool Analysis::isSafeToStackAllocate(Instruction* Alloc) { + assert(isa(Alloc->getType()) && "Capture is for pointers only!"); + Value* V = Alloc; + + SmallVector Worklist; + SmallSet Visited; + + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + Use *U = &UI.getUse(); + Visited.insert(U); + Worklist.push_back(U); + } + + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + Instruction *I = cast(U->getUser()); + V = U->get(); + + switch (I->getOpcode()) { + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS = CallSite::get(I); + // Not captured if the callee is readonly, doesn't return a copy through + // its return value and doesn't unwind (a readonly function can leak bits + // by throwing an exception or not depending on the input value). + if (CS.onlyReadsMemory() && CS.doesNotThrow() && + I->getType() == Type::VoidTy) + break; + + // Not captured if only passed via 'nocapture' arguments. Note that + // calling a function pointer does not in itself cause the pointer to + // be captured. This is a subtle point considering that (for example) + // the callee might return its own address. It is analogous to saying + // that loading a value from a pointer does not cause the pointer to be + // captured, even though the loaded value might be the pointer itself + // (think of self-referential objects). + CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); + for (CallSite::arg_iterator A = B; A != E; ++A) + if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture)) + // The parameter is not marked 'nocapture' - captured. + return false; + // Only passed via 'nocapture' arguments, or is the called function - not + // captured. + break; + } + case Instruction::Free: + // Freeing a pointer does not cause it to be captured. + break; + case Instruction::Load: + // Loading from a pointer does not cause it to be captured. + break; + case Instruction::Store: + if (V == I->getOperand(0)) + // Stored the pointer - it may be captured. + return false; + // Storing to the pointee does not cause the pointer to be captured. + break; + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + // It's not safe to stack-allocate if this derived pointer is live across + // the original allocation. + // That can only be true if it dominates the allocation. + // FIXME: To be more precise, it's also only true if it's then used after + // the original allocation instruction gets performed again, but + // how to check that? + if (!I->use_empty() && DT.dominates(I, Alloc)) + return false; + + // The original value is not captured via this if the new value isn't. + for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + Use *U = &UI.getUse(); + if (Visited.insert(U)) + Worklist.push_back(U); + } + break; + default: + // Something else - be conservative and say it is captured. + return false; + } + } + + // All uses examined - not captured or live across original allocation. + return true; +} + + #endif //USE_METADATA