comparison gen/abi-x86-64.cpp @ 1047:6bb04dbee21f

Some calling convention work for x86-64: - Implement x86-64 extern(C), hopefully correctly. - Tried to be a bit smarter about extern(D) while I was there. Interestingly, this code seems to be generating more efficient code than gcc and llvm-gcc in some edge cases, like returning a `{ [7 x i8] }` loaded from a stack slot from an extern(C) function. (gcc generates 7 1-byte loads, while this code generates a 4-byte, a 2-byte and a 1-byte load) I also added some changes to make sure structs being returned from functions or passed in as parameters are stored in memory where the rest of the backend seems to expect them to be. These should be removed when support for first-class aggregates improves.
author Frits van Bommel <fvbommel wxs.nl>
date Fri, 06 Mar 2009 16:00:47 +0100
parents
children f9333daa1bf5
comparison
equal deleted inserted replaced
1046:cc6489f32519 1047:6bb04dbee21f
1 /* TargetABI implementation for x86-64.
2 * Written for LDC by Frits van Bommel in 2009.
3 *
4 * extern(D) follows no particular external ABI, but tries to be smart about
5 * passing structs and returning them. It should probably be reviewed if the
6 * way LLVM implements fastcc on this platform ever changes.
7 * (Specifically, the number of return registers of various types is hardcoded)
8 *
9 *
10 * extern(C) implements the C calling convention for x86-64, as found in
11 * http://www.x86-64.org/documentation/abi-0.99.pdf
12 *
13 * Note:
14 * Where a discrepancy was found between llvm-gcc and the ABI documentation,
15 * llvm-gcc behavior was used for compatibility (after it was verified that
16 * regular gcc has the same behavior).
17 *
18 * LLVM gets it right for most types, but complex numbers and structs need some
19 * help. To make sure it gets those right we essentially bitcast small structs
20 * to a type to which LLVM assigns the appropriate registers, and pass that
21 * instead. Structs that are required to be passed in memory are explicitly
22 * marked with the ByVal attribute to ensure no part of them ends up in
23 * registers when only a subset of the desired registers are available.
24 *
25 * We don't perform the same transformation for D-specific types that contain
26 * multiple parts, such as dynamic arrays and delegates. They're passed as if
27 * the parts were passed as separate parameters. This helps make things like
28 * printf("%.*s", o.toString()) work as expected; if we didn't do this that
29 * wouldn't work if there were 4 other integer/pointer arguments before the
30 * toString() call because the string got bumped to memory with one integer
31 * register still free. Keeping it untransformed puts the length in a register
32 * and the pointer in memory, as printf expects it.
33 */
34
35 #include "dmd/mtype.h"
36 #include "dmd/declaration.h"
37 #include "dmd/aggregate.h"
38
39 #include "gen/llvm.h"
40 #include "gen/tollvm.h"
41 #include "gen/logger.h"
42 #include "gen/dvalue.h"
43 #include "gen/llvmhelpers.h"
44 #include "gen/abi.h"
45 #include "gen/abi-x86-64.h"
46 #include "ir/irfunction.h"
47
48 #include <cassert>
49 #include <map>
50 #include <string>
51 #include <utility>
52
53 // Implementation details for extern(C)
54 namespace {
55 /**
56 * This function helps filter out things that look like structs to C,
57 * but should be passed to C in separate arguments anyway.
58 *
59 * (e.g. dynamic arrays are passed as separate length and ptr. This
60 * is both less work and makes printf("%.*s", o.toString()) work)
61 */
62 inline bool keepUnchanged(Type* t) {
63 switch (t->ty) {
64 case Tarray: // dynamic array
65 case Taarray: // assoc array
66 case Tdelegate:
67 return true;
68
69 default:
70 return false;
71 }
72 }
73
74 enum ArgClass {
75 Integer, Sse, SseUp, X87, X87Up, ComplexX87, NoClass, Memory
76 };
77
78 struct Classification {
79 bool isMemory;
80 ArgClass classes[2];
81
82 Classification() : isMemory(false) {
83 classes[0] = NoClass;
84 classes[1] = NoClass;
85 }
86
87 void addField(unsigned offset, ArgClass cl) {
88 if (isMemory)
89 return;
90
91 // Note that we don't need to bother checking if it crosses 8 bytes.
92 // We don't get here with unaligned fields, and anything that can be
93 // big enough to cross 8 bytes (cdoubles, reals, structs and arrays)
94 // is special-cased in classifyType()
95 int idx = (offset < 8 ? 0 : 1);
96
97 ArgClass nw = merge(classes[idx], cl);
98 if (nw != classes[idx]) {
99 classes[idx] = nw;
100
101 if (nw == Memory) {
102 classes[1-idx] = Memory;
103 isMemory = true;
104 }
105 }
106 }
107
108 private:
109 ArgClass merge(ArgClass accum, ArgClass cl) {
110 if (accum == cl)
111 return accum;
112 if (accum == NoClass)
113 return cl;
114 if (cl == NoClass)
115 return accum;
116 if (accum == Memory || cl == Memory)
117 return Memory;
118 if (accum == Integer || cl == Integer)
119 return Integer;
120 if (accum == X87 || accum == X87Up || accum == ComplexX87 ||
121 cl == X87 || cl == X87Up || cl == ComplexX87)
122 return Memory;
123 return Sse;
124 }
125 };
126
127 void classifyType(Classification& accum, Type* ty, d_uns64 offset) {
128 if (Logger::enabled())
129 Logger::cout() << "Classifying " << ty->toChars() << " @ " << offset << '\n';
130
131 ty = ty->toBasetype();
132
133 if (ty->isintegral() || ty->ty == Tpointer) {
134 accum.addField(offset, Integer);
135 } else if (ty->ty == Tfloat80 || ty->ty == Timaginary80) {
136 accum.addField(offset, X87);
137 accum.addField(offset+8, X87Up);
138 } else if (ty->ty == Tcomplex80) {
139 accum.addField(offset, ComplexX87);
140 // make sure other half knows about it too:
141 accum.addField(offset+16, ComplexX87);
142 } else if (ty->ty == Tcomplex64) {
143 accum.addField(offset, Sse);
144 accum.addField(offset+8, Sse);
145 } else if (ty->ty == Tcomplex32) {
146 accum.addField(offset, Sse);
147 accum.addField(offset+4, Sse);
148 } else if (ty->isfloating()) {
149 accum.addField(offset, Sse);
150 } else if (ty->size() > 16 || hasUnalignedFields(ty)) {
151 // This isn't creal, yet is > 16 bytes, so pass in memory.
152 // Must be after creal case but before arrays and structs,
153 // the other types that can get bigger than 16 bytes
154 accum.addField(offset, Memory);
155 } else if (ty->ty == Tsarray) {
156 d_uns64 eltsize = ty->next->size();
157 if (eltsize > 0) {
158 d_uns64 dim = ty->size() / eltsize;
159 assert(dim <= 16
160 && "Array of non-empty type <= 16 bytes but > 16 elements?");
161 for (d_uns64 i = 0; i < dim; i++) {
162 classifyType(accum, ty->next, offset);
163 offset += eltsize;
164 }
165 }
166 } else if (ty->ty == Tstruct) {
167 Array* fields = &((TypeStruct*) ty)->sym->fields;
168 for (size_t i = 0; i < fields->dim; i++) {
169 VarDeclaration* field = (VarDeclaration*) fields->data[i];
170 classifyType(accum, field->type, offset + field->offset);
171 }
172 } else {
173 if (Logger::enabled())
174 Logger::cout() << "x86-64 ABI: Implicitly handled type: "
175 << ty->toChars() << '\n';
176 // arrays, delegates, etc. (pointer-sized fields, <= 16 bytes)
177 assert(offset == 0 || offset == 8
178 && "must be aligned and doesn't fit otherwise");
179 assert(ty->size() % 8 == 0 && "Not a multiple of pointer size?");
180
181 accum.addField(offset, Integer);
182 if (ty->size() > 8)
183 accum.addField(offset+8, Integer);
184 }
185 }
186
187 Classification classify(Type* ty) {
188 typedef std::map<Type*, Classification> ClassMap;
189 static ClassMap cache;
190
191 ClassMap::iterator it = cache.find(ty);
192 if (it != cache.end()) {
193 return it->second;
194 } else {
195 Classification cl;
196 classifyType(cl, ty, 0);
197 cache[ty] = cl;
198 return cl;
199 }
200 }
201
202 /// Returns the type to pass as, or null if no transformation is needed.
203 LLType* getAbiType(Type* ty) {
204 ty = ty->toBasetype();
205
206 // First, check if there's any need of a transformation:
207
208 if (keepUnchanged(ty))
209 return 0;
210
211 if (ty->ty != Tcomplex32 && ty->ty != Tstruct)
212 return 0; // Nothing to do,
213
214 Classification cl = classify(ty);
215 assert(!cl.isMemory);
216
217 if (cl.classes[0] == NoClass) {
218 assert(cl.classes[1] == NoClass && "Non-empty struct with empty first half?");
219 return 0; // Empty structs should also be handled correctly by LLVM
220 }
221
222 // Okay, we may need to transform. Figure out a canonical type:
223
224 std::vector<const LLType*> parts;
225
226 unsigned size = ty->size();
227
228 switch (cl.classes[0]) {
229 case Integer: {
230 unsigned bits = (size >= 8 ? 64 : (size * 8));
231 parts.push_back(LLIntegerType::get(bits));
232 break;
233 }
234
235 case Sse:
236 parts.push_back(size <= 4 ? LLType::FloatTy : LLType::DoubleTy);
237 break;
238
239 case X87:
240 assert(cl.classes[1] == X87Up && "Upper half of real not X87Up?");
241 /// The type only contains a single real/ireal field,
242 /// so just use that type.
243 return const_cast<LLType*>(LLType::X86_FP80Ty);
244
245 default:
246 assert(0 && "Unanticipated argument class");
247 }
248
249 switch(cl.classes[1]) {
250 case NoClass:
251 assert(parts.size() == 1);
252 // No need to use a single-element struct type.
253 // Just use the element type instead.
254 return const_cast<LLType*>(parts[0]);
255 break;
256
257 case Integer: {
258 assert(size > 8);
259 unsigned bits = (size - 8) * 8;
260 parts.push_back(LLIntegerType::get(bits));
261 break;
262 }
263 case Sse:
264 parts.push_back(size <= 12 ? LLType::FloatTy : LLType::DoubleTy);
265 break;
266
267 case X87Up:
268 if(cl.classes[0] == X87) {
269 // This won't happen: it was short-circuited while
270 // processing the first half.
271 } else {
272 // I can't find this anywhere in the ABI documentation,
273 // but this is what gcc does (both regular and llvm-gcc).
274 // (This triggers for types like union { real r; byte b; })
275 parts.push_back(LLType::DoubleTy);
276 }
277 break;
278
279 default:
280 assert(0 && "Unanticipated argument class for second half");
281 }
282 return LLStructType::get(parts);
283 }
284 }
285
286
287 // Implementation details for extern(D)
288 namespace x86_64_D_cc {
289 struct DRegCount {
290 unsigned ints;
291 unsigned sse;
292 unsigned x87;
293
294 DRegCount(unsigned ints_, unsigned sse_, unsigned x87_)
295 : ints(ints_), sse(sse_), x87(x87_) {}
296 };
297
298 // Count the number of registers needed for a simple type.
299 // (Not a struct or static array)
300 DRegCount regsNeededForSimpleType(Type* t) {
301 DRegCount r(0, 0, 0);
302 switch(t->ty) {
303 case Tstruct:
304 case Tsarray:
305 assert(0 && "Not a simple type!");
306 // Return huge numbers if assertions are disabled, so it'll always get
307 // bumped to memory.
308 r.ints = r.sse = r.x87 = (unsigned)-1;
309 break;
310
311 // Floats, doubles and such are passed in SSE registers
312 case Tfloat32:
313 case Tfloat64:
314 case Timaginary32:
315 case Timaginary64:
316 r.sse = 1;
317 break;
318
319 case Tcomplex32:
320 case Tcomplex64:
321 r.sse = 2;
322 break;
323
324 // Reals, ireals and creals are passed in x87 registers
325 case Tfloat80:
326 case Timaginary80:
327 r.x87 = 1;
328 break;
329
330 case Tcomplex80:
331 r.x87 = 2;
332 break;
333
334 // Anything else is passed in one or two integer registers,
335 // depending on its size.
336 default: {
337 int needed = (t->size() + 7) / 8;
338 assert(needed <= 2);
339 r.ints = needed;
340 break;
341 }
342 }
343 return r;
344 }
345
346 // Returns true if it's possible (and a good idea) to pass the struct in the
347 // specified number of registers.
348 // (May return false if it's a bad idea to pass the type in registers for
349 // reasons other than it not fitting)
350 // Note that if true is returned, 'left' is also modified to contain the
351 // number of registers left. This property is used in the recursive case.
352 // If false is returned, 'left' is garbage.
353 bool shouldPassStructInRegs(TypeStruct* t, DRegCount& left) {
354 // If it has unaligned fields, there's probably a reason for it,
355 // so keep it in memory.
356 if (hasUnalignedFields(t))
357 return false;
358
359 Array* fields = &t->sym->fields;
360 d_uns64 nextbyte = 0;
361 for (d_uns64 i = 0; i < fields->dim; i++) {
362 VarDeclaration* field = (VarDeclaration*) fields->data[i];
363
364 // This depends on ascending order of field offsets in structs
365 // without overlapping fields.
366 if (field->offset < nextbyte) {
367 // Don't return unions (or structs containing them) in registers.
368 return false;
369 }
370 nextbyte = field->offset + field->type->size();
371
372 switch (field->type->ty) {
373 case Tstruct:
374 if (!shouldPassStructInRegs((TypeStruct*) field->type, left))
375 return false;
376 break;
377
378 case Tsarray:
379 // Don't return static arrays in registers
380 // (indexing registers doesn't work well)
381 return false;
382
383 default: {
384 DRegCount needed = regsNeededForSimpleType(field->type);
385 if (needed.ints > left.ints || needed.sse > left.sse || needed.x87 > left.x87)
386 return false;
387 left.ints -= needed.ints;
388 left.sse -= needed.sse;
389 left.x87 -= needed.x87;
390 break;
391 }
392 }
393 }
394 return true;
395 }
396
397 // Returns true if the struct fits in return registers in the x86-64 fastcc
398 // calling convention.
399 bool retStructInRegs(TypeStruct* st) {
400 // 'fastcc' allows returns in up to two registers of each kind:
401 DRegCount state(2, 2, 2);
402 return shouldPassStructInRegs(st, state);
403 }
404
405 // Heuristic for determining whether to pass a struct type directly or
406 // bump it to memory.
407 bool passStructTypeDirectly(TypeStruct* st) {
408 // If the type fits in a reasonable number of registers,
409 // pass it directly.
410 // This does not necessarily mean it will actually be passed in
411 // registers. For example, x87 registers are never actually used for
412 // parameters.
413 DRegCount state(2, 2, 2);
414 return shouldPassStructInRegs(st, state);
415
416 // This doesn't work well: Since the register count can differ depending
417 // on backend options, there's no way to be exact anyway.
418 /*
419 // Regular fastcc: 6 int, 8 sse, 0 x87
420 // fastcc + tailcall: 5 int, 8 sse, 0 x87
421 RegCount state(5, 8, 0);
422 */
423 }
424 }
425
426 ////////////////////////////////////////////////////////////////////////////////
427 ////////////////////////////////////////////////////////////////////////////////
428 ////////////////////////////////////////////////////////////////////////////////
429 ////////////////////////////////////////////////////////////////////////////////
430
431
432 /// Just store to memory and it's readable as the other type.
433 struct X86_64_C_struct_rewrite : ABIRewrite {
434 // Get struct from ABI-mangled representation
435 LLValue* get(Type* dty, DValue* v)
436 {
437 LLValue* lval;
438 if (v->isLVal()) {
439 lval = v->getLVal();
440 } else {
441 // No memory location, create one.
442 LLValue* rval = v->getRVal();
443 lval = DtoAlloca(rval->getType());
444 DtoStore(rval, lval);
445 }
446
447 const LLType* pTy = getPtrToType(DtoType(dty));
448 return DtoLoad(DtoBitCast(lval, pTy), "get-result");
449 }
450
451 // Get struct from ABI-mangled representation, and store in the provided location.
452 void getL(Type* dty, DValue* v, llvm::Value* lval) {
453 LLValue* rval = v->getRVal();
454 const LLType* pTy = getPtrToType(rval->getType());
455 DtoStore(rval, DtoBitCast(lval, pTy));
456 }
457
458 // Turn a struct into an ABI-mangled representation
459 LLValue* put(Type* dty, DValue* v)
460 {
461 LLValue* lval;
462 if (v->isLVal()) {
463 lval = v->getLVal();
464 } else {
465 // No memory location, create one.
466 LLValue* rval = v->getRVal();
467 lval = DtoAlloca(rval->getType());
468 DtoStore(rval, lval);
469 }
470
471 LLType* abiTy = getAbiType(dty);
472 assert(abiTy && "Why are we rewriting a non-rewritten type?");
473
474 const LLType* pTy = getPtrToType(abiTy);
475 return DtoLoad(DtoBitCast(lval, pTy), "put-result");
476 }
477
478 /// should return the transformed type for this rewrite
479 const LLType* type(Type* dty, const LLType* t)
480 {
481 return getAbiType(dty);
482 }
483 };
484
485
486 struct RegCount {
487 unsigned char int_regs, sse_regs;
488 };
489
490
491 struct X86_64TargetABI : TargetABI {
492 X86_64_C_struct_rewrite struct_rewrite;
493
494 void newFunctionType(TypeFunction* tf) {
495 funcTypeStack.push_back(FuncTypeData(tf->linkage));
496 }
497
498 bool returnInArg(TypeFunction* tf);
499
500 bool passByVal(Type* t);
501
502 void rewriteFunctionType(TypeFunction* tf);
503
504 void doneWithFunctionType() {
505 funcTypeStack.pop_back();
506 }
507
508 private:
509 struct FuncTypeData {
510 LINK linkage; // Linkage of the function type currently under construction
511 RegCount state; // bookkeeping for extern(C) parameter registers
512
513 FuncTypeData(LINK linkage_)
514 : linkage(linkage_)
515 {
516 state.int_regs = 6;
517 state.sse_regs = 8;
518 }
519 };
520 std::vector<FuncTypeData> funcTypeStack;
521
522 LINK linkage() {
523 assert(funcTypeStack.size() != 0);
524 return funcTypeStack.back().linkage;
525 }
526
527 RegCount& state() {
528 assert(funcTypeStack.size() != 0);
529 return funcTypeStack.back().state;
530 }
531
532 void fixup(IrFuncTyArg& arg);
533 };
534
535
536 // The public getter for abi.cpp
537 TargetABI* getX86_64TargetABI() {
538 return new X86_64TargetABI;
539 }
540
541
542 bool X86_64TargetABI::returnInArg(TypeFunction* tf) {
543 assert(linkage() == tf->linkage);
544 Type* rt = tf->next->toBasetype();
545
546 if (tf->linkage == LINKd) {
547 assert(rt->ty != Tsarray && "Update calling convention for static array returns");
548
549 // All non-structs can be returned in registers.
550 if (rt->ty != Tstruct)
551 return false;
552
553 // Try to figure out whether the struct fits in return registers
554 // and whether it's a good idea to put it there.
555 return !x86_64_D_cc::retStructInRegs((TypeStruct*) rt);
556 } else {
557 if (rt == Type::tvoid || keepUnchanged(rt))
558 return false;
559
560 Classification cl = classify(rt);
561 return cl.isMemory;
562 }
563 }
564
565 bool X86_64TargetABI::passByVal(Type* t) {
566 if (linkage() == LINKd) {
567 if (t->ty != Tstruct)
568 return false;
569
570 // Try to be smart about which structs are passed in memory.
571 return !x86_64_D_cc::passStructTypeDirectly((TypeStruct*) t);
572 } else {
573 // This implements the C calling convention for x86-64.
574 // It might not be correct for other calling conventions.
575 Classification cl = classify(t);
576 if (cl.isMemory)
577 return true;
578
579 // Figure out how many registers we want for this arg:
580 RegCount wanted = { 0, 0 };
581 for (int i = 0 ; i < 2; i++) {
582 if (cl.classes[i] == Integer)
583 wanted.int_regs++;
584 else if (cl.classes[i] == Sse)
585 wanted.sse_regs++;
586 }
587
588 // See if they're available:
589 RegCount& state = this->state();
590 if (wanted.int_regs <= state.int_regs && wanted.sse_regs <= state.sse_regs) {
591 state.int_regs -= wanted.int_regs;
592 state.sse_regs -= wanted.sse_regs;
593 } else {
594 if (keepUnchanged(t)) {
595 // Not enough registers available, but this is passed as if it's
596 // multiple arguments. Just use the registers there are,
597 // automatically spilling the rest to memory.
598 if (wanted.int_regs > state.int_regs)
599 state.int_regs = 0;
600 else
601 state.int_regs -= wanted.int_regs;
602
603 if (wanted.sse_regs > state.sse_regs)
604 state.sse_regs = 0;
605 else
606 state.sse_regs -= wanted.sse_regs;
607 } else if (t->iscomplex() || t->ty == Tstruct) {
608 // Spill entirely to memory, even if some of the registers are
609 // available.
610
611 // FIXME: Don't do this if *none* of the wanted registers are available,
612 // (i.e. only when absolutely necessary for abi-compliance)
613 // so it gets alloca'd by the callee and -scalarrepl can
614 // more easily break it up?
615 // Note: this won't be necessary if the following LLVM bug gets fixed:
616 // http://llvm.org/bugs/show_bug.cgi?id=3741
617 return true;
618 } else {
619 assert(t == Type::tfloat80 || t == Type::timaginary80 || t->size() < 8
620 && "What other big types are there?"); // other than static arrays...
621 // In any case, they shouldn't be represented as structs in LLVM:
622 assert(!isaStruct(DtoType(t)));
623 }
624 }
625 // Everything else that's passed in memory is handled by LLVM.
626 return false;
627 }
628 }
629
630 // Helper function for rewriteFunctionType.
631 // Return type and parameters are passed here (unless they're already in memory)
632 // to get the rewrite applied (if necessary).
633 void X86_64TargetABI::fixup(IrFuncTyArg& arg) {
634 LLType* abiTy = getAbiType(arg.type);
635
636 if (abiTy && abiTy != arg.ltype) {
637 assert(arg.type == Type::tcomplex32 || arg.type->ty == Tstruct);
638 arg.ltype = abiTy;
639 arg.rewrite = &struct_rewrite;
640 }
641 }
642
643 void X86_64TargetABI::rewriteFunctionType(TypeFunction* tf) {
644 // extern(D) is handled entirely by passByVal and returnInArg
645
646 if (tf->linkage != LINKd) {
647 // TODO: See if this is correct for more than just extern(C).
648
649 IrFuncTy* fty = tf->fty;
650
651 if (!fty->arg_sret) {
652 Logger::println("x86-64 ABI: Transforming return type");
653 Type* rt = fty->ret->type->toBasetype();
654 if (rt != Type::tvoid)
655 fixup(*fty->ret);
656 }
657
658
659 Logger::println("x86-64 ABI: Transforming arguments");
660 LOG_SCOPE;
661
662 for (IrFuncTy::ArgIter I = fty->args.begin(), E = fty->args.end(); I != E; ++I) {
663 IrFuncTyArg& arg = **I;
664
665 if (Logger::enabled())
666 Logger::cout() << "Arg: " << arg.type->toChars() << '\n';
667
668 // Arguments that are in memory are of no interest to us.
669 if (arg.byref)
670 continue;
671
672 Type* ty = arg.type->toBasetype();
673
674 fixup(arg);
675
676 if (Logger::enabled())
677 Logger::cout() << "New arg type: " << *arg.ltype << '\n';
678 }
679 }
680 }