annotate run/a/asm_rsqrtps_01_A.d @ 1630:d0efa3ae5522 default tip

run/mini/naked_asm5: New x86_64 ABI passes the arguments in reverse order.
author David Nadlinger <code@klickverbot.at>
date Sat, 23 Apr 2011 22:57:32 +0200
parents 236f0d02b8b9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1040
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
1 // $HeadURL$
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
2 // $Date$
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
3 // $Author$
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
4
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
5 module dstress.run.a.asm_rsqrtps_01_A;
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
6
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
7 version(D_InlineAsm_X86){
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
8 version = runTest;
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
9 }else version(D_InlineAsm_X86_64){
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
10 version = runTest;
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
11 }
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
12
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
13 version(runTest){
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
14 import addon.cpuinfo;
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
15
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
16 int main(){
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
17 haveSSE!()();
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
18
1265
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
19 float* a = (new float[4]).ptr;
1085
98f634622849 -fPIC fixes
thomask
parents: 1044
diff changeset
20 a[0] = 1.0f;
98f634622849 -fPIC fixes
thomask
parents: 1044
diff changeset
21 a[1] = 2.0f;
98f634622849 -fPIC fixes
thomask
parents: 1044
diff changeset
22 a[2] = 3.0f;
98f634622849 -fPIC fixes
thomask
parents: 1044
diff changeset
23 a[3] = 4.0f;
98f634622849 -fPIC fixes
thomask
parents: 1044
diff changeset
24
1265
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
25 float* b = (new float[4]).ptr;
1085
98f634622849 -fPIC fixes
thomask
parents: 1044
diff changeset
26 b[0] = -1.0f;
98f634622849 -fPIC fixes
thomask
parents: 1044
diff changeset
27 b[1] = 1.0f;
98f634622849 -fPIC fixes
thomask
parents: 1044
diff changeset
28 b[2] = 4.0f;
98f634622849 -fPIC fixes
thomask
parents: 1044
diff changeset
29 b[3] = -4.0f;
98f634622849 -fPIC fixes
thomask
parents: 1044
diff changeset
30
1265
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
31 float* c = (new float[4]).ptr;
1040
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
32
1265
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
33 static if(size_t.sizeof == 4){
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
34 asm{
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
35 mov EAX, a;
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
36 movups XMM0, [EAX];
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
37 mov EAX, b;
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
38 movups XMM1, [EAX];
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
39 rsqrtps XMM0, XMM1;
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
40 mov EAX, c;
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
41 movups [EAX], XMM0;
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
42 }
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
43 }else static if(size_t.sizeof == 8){
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
44 asm{
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
45 mov RAX, a;
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
46 movups XMM0, [RAX];
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
47 mov RAX, b;
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
48 movups XMM1, [RAX];
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
49 rsqrtps XMM0, XMM1;
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
50 mov RAX, c;
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
51 movups [RAX], XMM0;
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
52 }
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
53 }else{
236f0d02b8b9 32<->64 iasm fixes
thomask
parents: 1085
diff changeset
54 static assert(0, "unhandled pointer size");
1040
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
55 }
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
56
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
57 c[0] += 1.0f;
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
58 if(c[0] < 0.0f){
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
59 c[0] = -c[0];
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
60 }
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
61 if(c[0] > 1.0f / 4096.0f){
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
62 assert(0);
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
63 }
1044
03c97933de98 inline ASM review
thomask
parents: 1040
diff changeset
64
1040
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
65 c[1] -= 1.0f;
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
66 if(c[1] < 0.0f){
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
67 c[1] = -c[1];
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
68 }
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
69 if(c[1] > 1.0f / 4096.0f){
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
70 assert(0);
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
71 }
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
72
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
73 c[2] -= 0.5f;
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
74 if(c[2] < 0.0f){
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
75 c[2] = -c[2];
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
76 }
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
77 if(c[2] > 0.25f / 4096.0f){
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
78 assert(0);
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
79 }
1044
03c97933de98 inline ASM review
thomask
parents: 1040
diff changeset
80
1040
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
81 c[3] += 0.5f;
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
82 if(c[3] < 0.0f){
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
83 c[3] = -c[3];
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
84 }
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
85 if(c[3] > 0.25f / 4096.0f){
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
86 assert(0);
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
87 }
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
88
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
89 return 0;
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
90 }
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
91 }else{
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
92 pragma(msg, "DSTRESS{XFAIL}: no inline ASM support");
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
93 static assert(0);
a3d6bbc8dec0 inline ASM review
thomask
parents:
diff changeset
94 }