# HG changeset patch # User Frits van Bommel # Date 1239588548 -7200 # Node ID df2227fdc8600396c5d3778ec4482eb00900d032 # Parent 50dc0db06238168a41b0a7155fbbfc7d35fa9484 For the outermost function needing a context frame, use the address of that frame as the nest argument instead of the address of a single-element list containing only that frame address. This saves some stack space and reduces memory accesses. diff -r 50dc0db06238 -r df2227fdc860 gen/nested.cpp --- a/gen/nested.cpp Sun Apr 12 22:22:15 2009 +0200 +++ b/gen/nested.cpp Mon Apr 13 04:09:08 2009 +0200 @@ -28,6 +28,8 @@ /// Context is a list of pointers to structs. Each function with variables /// accessed by nested functions puts them in a struct, and appends a /// pointer to that struct to it's local copy of the list. + /// As an additional optimization, if the list has length one it's not + /// generated; the only element is used directly instead. NCHybrid }; @@ -102,12 +104,17 @@ return new DVarValue(astype, vd, val); } else if (nestedCtx == NCHybrid) { - FuncDeclaration *parentfunc = getParentFunc(vd); - assert(parentfunc && "No parent function for nested variable?"); + FuncDeclaration* parentfunc = getParentFunc(irfunc->decl); + assert(parentfunc && "No parent function for nested function?"); + Logger::println("Parent function: %s", parentfunc->toChars()); LLValue* val = DtoBitCast(ctx, LLPointerType::getUnqual(parentfunc->ir.irFunc->framesType)); - val = DtoGEPi(val, 0, vd->ir.irLocal->nestedDepth); - val = DtoAlignedLoad(val, (std::string(".frame.") + parentfunc->toChars()).c_str()); + Logger::cout() << "Context: " << *val << '\n'; + + if (!parentfunc->ir.irFunc->elidedCtxList) { + val = DtoGEPi(val, 0, vd->ir.irLocal->nestedDepth); + val = DtoAlignedLoad(val, (std::string(".frame.") + vdparent->toChars()).c_str()); + } val = DtoGEPi(val, 0, vd->ir.irLocal->nestedIndex, vd->toChars()); if (vd->ir.irLocal->byref) val = DtoAlignedLoad(val); @@ -123,7 +130,8 @@ Logger::println("DtoNestedInit for %s", vd->toChars()); LOG_SCOPE - LLValue* nestedVar = gIR->func()->decl->ir.irFunc->nestedVar; + IrFunction* irfunc = gIR->func()->decl->ir.irFunc; + LLValue* nestedVar = irfunc->nestedVar; if (nestedCtx == NCArray) { // alloca as usual if no value already @@ -143,12 +151,16 @@ assert(vd->ir.irLocal->value && "Nested variable without storage?"); if (!vd->isParameter() && (vd->isRef() || vd->isOut())) { Logger::println("Initializing non-parameter byref value"); - LLValue* framep = DtoGEPi(nestedVar, 0, vd->ir.irLocal->nestedDepth); - - FuncDeclaration *parentfunc = getParentFunc(vd); - assert(parentfunc && "No parent function for nested variable?"); - LLValue* frame = DtoAlignedLoad(framep, (std::string(".frame.") + parentfunc->toChars()).c_str()); - + LLValue* frame; + if (!irfunc->elidedCtxList) { + LLValue* framep = DtoGEPi(nestedVar, 0, vd->ir.irLocal->nestedDepth); + + FuncDeclaration *parentfunc = getParentFunc(vd); + assert(parentfunc && "No parent function for nested variable?"); + frame = DtoAlignedLoad(framep, (std::string(".frame.") + parentfunc->toChars()).c_str()); + } else { + frame = nestedVar; + } LLValue* slot = DtoGEPi(frame, 0, vd->ir.irLocal->nestedIndex); DtoAlignedStore(vd->ir.irLocal->value, slot); } else { @@ -293,9 +305,15 @@ if (FuncDeclaration* parfd = par->isFuncDeclaration()) { // skip functions without nested parameters if (!parfd->nestedVars.empty()) { - // Copy the types of parent function frames. const LLStructType* parft = parfd->ir.irFunc->framesType; - frametypes.insert(frametypes.begin(), parft->element_begin(), parft->element_end()); + if (parfd->ir.irFunc->elidedCtxList) { + // This is the outermost function with a nested context. + // Its context is not a list of frames, but just the frame itself. + frametypes.push_back(LLPointerType::getUnqual(parft)); + } else { + // Copy the types of parent function frames. + frametypes.insert(frametypes.begin(), parft->element_begin(), parft->element_end()); + } break; // That's all the info needed. } } else if (ClassDeclaration* parcd = par->isClassDeclaration()) { @@ -308,6 +326,13 @@ } unsigned depth = frametypes.size(); + if (Logger::enabled()) { + Logger::println("Frame types: "); + LOG_SCOPE; + for (TypeVec::iterator i=frametypes.begin(); i!=frametypes.end(); ++i) + Logger::cout() << **i << '\n'; + } + // Construct a struct for the direct nested variables of this function, and update their indices to match. // TODO: optimize ordering for minimal space usage? TypeVec types; @@ -336,24 +361,43 @@ Logger::cout() << "of type: " << *types.back() << '\n'; } } + // Append current frame type to frame type list - const LLType* frameType = LLStructType::get(types); - frametypes.push_back(LLPointerType::getUnqual(frameType)); + const LLStructType* frameType = LLStructType::get(types); + const LLStructType* nestedVarsTy = NULL; + if (!frametypes.empty()) { + assert(depth > 0); + frametypes.push_back(LLPointerType::getUnqual(frameType)); - // make struct type for nested frame list - const LLStructType* nestedVarsTy = LLStructType::get(frametypes); + // make struct type for nested frame list + nestedVarsTy = LLStructType::get(frametypes); + } else { + assert(depth == 0); + // For the outer function, just use the frame as the context + // instead of alloca'ing a single-element framelist and passing + // a pointer to that. + nestedVarsTy = frameType; + fd->ir.irFunc->elidedCtxList = true; + } + + Logger::cout() << "nestedVarsTy = " << *nestedVarsTy << '\n'; // Store type in IrFunction IrFunction* irfunction = fd->ir.irFunc; irfunction->framesType = nestedVarsTy; - // alloca it + LLValue* nestedVars = NULL; + + // Create frame for current function and append to frames list // FIXME: For D2, this should be a gc_malloc (or similar) call, not alloca - LLValue* nestedVars = DtoAlloca(nestedVarsTy, ".frame_list"); + LLValue* frame = DtoAlloca(frameType, ".frame"); // copy parent frames into beginning if (depth != 0) { + // alloca frame list first + nestedVars = DtoAlloca(nestedVarsTy, ".frame_list"); + LLValue* src = irfunction->nestArg; if (!src) { @@ -366,17 +410,25 @@ Logger::println("Indexing to 'this'"); src = DtoLoad(DtoGEPi(thisval, 0, cd->vthis->ir.irField->index, ".vthis")); } - src = DtoBitCast(src, getVoidPtrType()); - LLValue* dst = DtoBitCast(nestedVars, getVoidPtrType()); - DtoMemCpy(dst, src, DtoConstSize_t(depth * PTRSIZE), - getABITypeAlign(getVoidPtrType())); + if (depth == 1) { + // Just copy nestArg into framelist; the outer frame is not a list of pointers + // but a direct pointer. + src = DtoBitCast(src, frametypes[0]); + LLValue* gep = DtoGEPi(nestedVars, 0, 0); + DtoAlignedStore(src, gep); + } else { + src = DtoBitCast(src, getVoidPtrType()); + LLValue* dst = DtoBitCast(nestedVars, getVoidPtrType()); + DtoMemCpy(dst, src, DtoConstSize_t(depth * PTRSIZE), + getABITypeAlign(getVoidPtrType())); + } + // store current frame in list + DtoAlignedStore(frame, DtoGEPi(nestedVars, 0, depth)); + } else { + // Use frame as context directly + nestedVars = frame; } - // Create frame for current function and append to frames list - LLValue* frame = DtoAlloca(frameType, ".frame"); - // store current frame in list - DtoAlignedStore(frame, DtoGEPi(nestedVars, 0, depth)); - // store context in IrFunction irfunction->nestedVar = nestedVars; @@ -403,6 +455,10 @@ vd->ir.irLocal->byref = false; } } + } else if (FuncDeclaration* parFunc = getParentFunc(fd)) { + // Propagate context arg properties if the context arg is passed on unmodified. + fd->ir.irFunc->framesType = parFunc->ir.irFunc->framesType; + fd->ir.irFunc->elidedCtxList = parFunc->ir.irFunc->elidedCtxList; } } else { diff -r 50dc0db06238 -r df2227fdc860 ir/irfunction.cpp --- a/ir/irfunction.cpp Sun Apr 12 22:22:15 2009 +0200 +++ b/ir/irfunction.cpp Mon Apr 13 04:09:08 2009 +0200 @@ -109,6 +109,7 @@ retArg = NULL; thisArg = NULL; nestArg = NULL; + elidedCtxList = false; nestedVar = NULL; framesType = NULL; diff -r 50dc0db06238 -r df2227fdc860 ir/irfunction.h --- a/ir/irfunction.h Sun Apr 12 22:22:15 2009 +0200 +++ b/ir/irfunction.h Mon Apr 13 04:09:08 2009 +0200 @@ -46,6 +46,7 @@ llvm::Value* nestedVar; // nested var alloca const llvm::StructType* framesType; // type of nested context (not for -nested-ctx=array) + bool elidedCtxList; // whether the nested context is a raw frame instead of a list of frames (-nested-ctx=hybrid only) llvm::Value* _arguments; llvm::Value* _argptr;