Mercurial > projects > ldc
annotate druntime/src/compiler/dmd/arrayfloat.d @ 1458:e0b2d67cfe7c
Added druntime (this should be removed once it works).
author | Robert Clipsham <robert@octarineparrot.com> |
---|---|
date | Tue, 02 Jun 2009 17:43:06 +0100 |
parents | |
children |
rev | line source |
---|---|
1458
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1 /** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2 * Contains SSE2 and MMX versions of certain operations for float. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
3 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
4 * Copyright: Copyright Digital Mars 2008 - 2009. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
5 * License: <a href="http://www.boost.org/LICENSE_1_0.txt>Boost License 1.0</a>. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
6 * Authors: Walter Bright, based on code originally written by Burton Radons |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
7 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
8 * Copyright Digital Mars 2008 - 2009. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
9 * Distributed under the Boost Software License, Version 1.0. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
10 * (See accompanying file LICENSE_1_0.txt or copy at |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
11 * http://www.boost.org/LICENSE_1_0.txt) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
12 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
13 module rt.arrayfloat; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
14 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
15 private import rt.util.cpuid; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
16 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
17 version (unittest) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
18 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
19 private import core.stdc.stdio : printf; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
20 /* This is so unit tests will test every CPU variant |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
21 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
22 int cpuid; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
23 const int CPUID_MAX = 5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
24 bool mmx() { return cpuid == 1 && rt.util.cpuid.mmx(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
25 bool sse() { return cpuid == 2 && rt.util.cpuid.sse(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
26 bool sse2() { return cpuid == 3 && rt.util.cpuid.sse2(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
27 bool amd3dnow() { return cpuid == 4 && rt.util.cpuid.amd3dnow(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
28 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
29 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
30 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
31 alias rt.util.cpuid.mmx mmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
32 alias rt.util.cpuid.sse sse; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
33 alias rt.util.cpuid.sse2 sse2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
34 alias rt.util.cpuid.amd3dnow amd3dnow; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
35 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
36 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
37 //version = log; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
38 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
39 bool disjoint(T)(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
40 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
41 return (a.ptr + a.length <= b.ptr || b.ptr + b.length <= a.ptr); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
42 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
43 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
44 alias float T; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
45 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
46 extern (C): |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
47 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
48 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
49 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
50 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
51 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
52 * a[] = b[] + c[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
53 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
54 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
55 T[] _arraySliceSliceAddSliceAssign_f(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
56 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
57 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
58 assert(a.length == b.length && b.length == c.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
59 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
60 assert(disjoint(a, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
61 assert(disjoint(b, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
62 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
63 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
64 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
65 //printf("_arraySliceSliceAddSliceAssign_f()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
66 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
67 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
68 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
69 auto cptr = c.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
70 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
71 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
72 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
73 // SSE version is 834% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
74 if (sse() && b.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
75 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
76 version (log) printf("\tsse unaligned\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
77 auto n = aptr + (b.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
78 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
79 // Unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
80 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
81 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
82 mov EAX, bptr; // left operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
83 mov ECX, cptr; // right operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
84 mov ESI, aptr; // destination operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
85 mov EDI, n; // end comparison |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
86 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
87 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
88 startsseloopb: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
89 movups XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
90 movups XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
91 movups XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
92 movups XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
93 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
94 movups XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
95 movups XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
96 movups XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
97 movups XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
98 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
99 addps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
100 addps XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
101 addps XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
102 addps XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
103 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
104 movups [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
105 movups [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
106 movups [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
107 movups [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
108 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
109 jb startsseloopb; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
110 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
111 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
112 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
113 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
114 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
115 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
116 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
117 // 3DNow! version is only 13% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
118 if (amd3dnow() && b.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
119 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
120 version (log) printf("\tamd3dnow\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
121 auto n = aptr + (b.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
122 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
123 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
124 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
125 mov ESI, aptr; // destination operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
126 mov EDI, n; // end comparison |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
127 mov EAX, bptr; // left operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
128 mov ECX, cptr; // right operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
129 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
130 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
131 start3dnow: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
132 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
133 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
134 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
135 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
136 pfadd MM0, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
137 pfadd MM1, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
138 pfadd MM2, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
139 pfadd MM3, [ECX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
140 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
141 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
142 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
143 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
144 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
145 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
146 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
147 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
148 jb start3dnow; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
149 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
150 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
151 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
152 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
153 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
154 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
155 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
156 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
157 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
158 // Handle remainder |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
159 version (log) if (aptr < aend) printf("\tbase\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
160 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
161 *aptr++ = *bptr++ + *cptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
162 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
163 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
164 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
165 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
166 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
167 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
168 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
169 printf("_arraySliceSliceAddSliceAssign_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
170 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
171 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
172 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
173 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
174 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
175 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
176 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
177 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
178 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
179 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
180 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
181 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
182 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
183 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
184 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
185 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
186 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
187 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
188 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
189 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
190 c[] = a[] + b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
191 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
192 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
193 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
194 if (c[i] != cast(T)(a[i] + b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
195 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
196 printf("[%d]: %g != %g + %g\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
197 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
198 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
199 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
200 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
201 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
202 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
203 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
204 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
205 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
206 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
207 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
208 * a[] = b[] - c[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
209 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
210 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
211 T[] _arraySliceSliceMinSliceAssign_f(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
212 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
213 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
214 assert(a.length == b.length && b.length == c.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
215 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
216 assert(disjoint(a, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
217 assert(disjoint(b, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
218 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
219 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
220 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
221 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
222 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
223 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
224 auto cptr = c.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
225 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
226 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
227 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
228 // SSE version is 834% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
229 if (sse() && b.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
230 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
231 auto n = aptr + (b.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
232 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
233 // Unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
234 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
235 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
236 mov EAX, bptr; // left operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
237 mov ECX, cptr; // right operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
238 mov ESI, aptr; // destination operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
239 mov EDI, n; // end comparison |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
240 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
241 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
242 startsseloopb: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
243 movups XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
244 movups XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
245 movups XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
246 movups XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
247 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
248 movups XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
249 movups XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
250 movups XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
251 movups XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
252 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
253 subps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
254 subps XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
255 subps XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
256 subps XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
257 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
258 movups [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
259 movups [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
260 movups [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
261 movups [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
262 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
263 jb startsseloopb; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
264 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
265 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
266 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
267 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
268 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
269 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
270 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
271 // 3DNow! version is only 13% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
272 if (amd3dnow() && b.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
273 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
274 auto n = aptr + (b.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
275 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
276 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
277 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
278 mov ESI, aptr; // destination operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
279 mov EDI, n; // end comparison |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
280 mov EAX, bptr; // left operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
281 mov ECX, cptr; // right operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
282 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
283 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
284 start3dnow: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
285 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
286 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
287 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
288 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
289 pfsub MM0, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
290 pfsub MM1, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
291 pfsub MM2, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
292 pfsub MM3, [ECX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
293 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
294 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
295 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
296 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
297 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
298 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
299 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
300 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
301 jb start3dnow; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
302 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
303 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
304 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
305 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
306 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
307 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
308 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
309 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
310 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
311 // Handle remainder |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
312 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
313 *aptr++ = *bptr++ - *cptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
314 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
315 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
316 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
317 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
318 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
319 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
320 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
321 printf("_arraySliceSliceMinSliceAssign_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
322 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
323 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
324 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
325 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
326 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
327 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
328 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
329 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
330 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
331 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
332 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
333 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
334 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
335 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
336 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
337 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
338 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
339 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
340 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
341 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
342 c[] = a[] - b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
343 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
344 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
345 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
346 if (c[i] != cast(T)(a[i] - b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
347 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
348 printf("[%d]: %g != %gd - %g\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
349 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
350 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
351 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
352 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
353 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
354 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
355 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
356 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
357 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
358 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
359 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
360 * a[] = b[] + value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
361 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
362 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
363 T[] _arraySliceExpAddSliceAssign_f(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
364 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
365 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
366 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
367 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
368 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
369 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
370 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
371 //printf("_arraySliceExpAddSliceAssign_f()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
372 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
373 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
374 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
375 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
376 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
377 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
378 // SSE version is 665% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
379 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
380 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
381 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
382 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
383 // Unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
384 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
385 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
386 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
387 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
388 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
389 movss XMM4, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
390 shufps XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
391 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
392 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
393 startsseloop: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
394 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
395 movups XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
396 movups XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
397 movups XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
398 movups XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
399 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
400 addps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
401 addps XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
402 addps XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
403 addps XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
404 movups [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
405 movups [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
406 movups [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
407 movups [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
408 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
409 jb startsseloop; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
410 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
411 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
412 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
413 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
414 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
415 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
416 // 3DNow! version is 69% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
417 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
418 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
419 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
420 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
421 ulong w = *cast(uint *) &value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
422 ulong v = w | (w << 32L); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
423 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
424 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
425 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
426 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
427 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
428 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
429 movq MM4, qword ptr [v]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
430 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
431 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
432 start3dnow: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
433 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
434 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
435 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
436 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
437 pfadd MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
438 pfadd MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
439 pfadd MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
440 pfadd MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
441 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
442 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
443 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
444 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
445 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
446 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
447 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
448 jb start3dnow; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
449 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
450 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
451 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
452 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
453 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
454 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
455 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
456 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
457 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
458 *aptr++ = *bptr++ + value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
459 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
460 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
461 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
462 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
463 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
464 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
465 printf("_arraySliceExpAddSliceAssign_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
466 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
467 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
468 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
469 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
470 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
471 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
472 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
473 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
474 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
475 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
476 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
477 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
478 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
479 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
480 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
481 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
482 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
483 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
484 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
485 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
486 c[] = a[] + 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
487 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
488 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
489 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
490 if (c[i] != cast(T)(a[i] + 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
491 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
492 printf("[%d]: %g != %g + 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
493 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
494 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
495 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
496 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
497 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
498 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
499 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
500 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
501 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
502 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
503 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
504 * a[] += value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
505 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
506 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
507 T[] _arrayExpSliceAddass_f(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
508 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
509 //printf("_arrayExpSliceAddass_f(a.length = %d, value = %Lg)\n", a.length, cast(real)value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
510 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
511 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
512 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
513 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
514 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
515 // SSE version is 302% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
516 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
517 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
518 // align pointer |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
519 auto n = cast(T*)((cast(uint)aptr + 15) & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
520 while (aptr < n) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
521 *aptr++ += value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
522 n = cast(T*)((cast(uint)aend) & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
523 if (aptr < n) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
524 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
525 // Aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
526 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
527 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
528 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
529 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
530 movss XMM4, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
531 shufps XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
532 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
533 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
534 startsseloopa: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
535 movaps XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
536 movaps XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
537 movaps XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
538 movaps XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
539 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
540 addps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
541 addps XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
542 addps XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
543 addps XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
544 movaps [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
545 movaps [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
546 movaps [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
547 movaps [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
548 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
549 jb startsseloopa; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
550 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
551 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
552 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
553 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
554 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
555 // 3DNow! version is 63% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
556 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
557 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
558 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
559 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
560 ulong w = *cast(uint *) &value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
561 ulong v = w | (w << 32L); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
562 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
563 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
564 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
565 mov ESI, dword ptr [aptr]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
566 mov EDI, dword ptr [n]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
567 movq MM4, qword ptr [v]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
568 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
569 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
570 start3dnow: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
571 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
572 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
573 movq MM2, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
574 movq MM3, [ESI+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
575 pfadd MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
576 pfadd MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
577 pfadd MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
578 pfadd MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
579 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
580 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
581 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
582 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
583 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
584 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
585 jb start3dnow; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
586 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
587 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
588 mov dword ptr [aptr], ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
589 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
590 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
591 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
592 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
593 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
594 *aptr++ += value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
595 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
596 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
597 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
598 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
599 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
600 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
601 printf("_arrayExpSliceAddass_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
602 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
603 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
604 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
605 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
606 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
607 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
608 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
609 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
610 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
611 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
612 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
613 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
614 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
615 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
616 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
617 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
618 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
619 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
620 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
621 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
622 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
623 c[] += 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
624 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
625 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
626 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
627 if (c[i] != cast(T)(a[i] + 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
628 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
629 printf("[%d]: %g != %g + 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
630 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
631 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
632 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
633 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
634 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
635 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
636 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
637 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
638 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
639 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
640 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
641 * a[] += b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
642 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
643 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
644 T[] _arraySliceSliceAddass_f(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
645 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
646 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
647 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
648 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
649 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
650 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
651 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
652 //printf("_arraySliceSliceAddass_f()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
653 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
654 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
655 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
656 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
657 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
658 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
659 // SSE version is 468% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
660 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
661 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
662 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
663 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
664 // Unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
665 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
666 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
667 mov ECX, bptr; // right operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
668 mov ESI, aptr; // destination operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
669 mov EDI, n; // end comparison |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
670 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
671 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
672 startsseloopb: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
673 movups XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
674 movups XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
675 movups XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
676 movups XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
677 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
678 movups XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
679 movups XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
680 movups XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
681 movups XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
682 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
683 addps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
684 addps XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
685 addps XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
686 addps XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
687 movups [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
688 movups [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
689 movups [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
690 movups [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
691 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
692 jb startsseloopb; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
693 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
694 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
695 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
696 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
697 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
698 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
699 // 3DNow! version is 57% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
700 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
701 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
702 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
703 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
704 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
705 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
706 mov ESI, dword ptr [aptr]; // destination operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
707 mov EDI, dword ptr [n]; // end comparison |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
708 mov ECX, dword ptr [bptr]; // right operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
709 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
710 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
711 start3dnow: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
712 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
713 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
714 movq MM2, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
715 movq MM3, [ESI+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
716 pfadd MM0, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
717 pfadd MM1, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
718 pfadd MM2, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
719 pfadd MM3, [ECX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
720 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
721 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
722 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
723 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
724 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
725 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
726 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
727 jb start3dnow; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
728 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
729 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
730 mov dword ptr [aptr], ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
731 mov dword ptr [bptr], ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
732 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
733 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
734 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
735 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
736 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
737 *aptr++ += *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
738 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
739 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
740 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
741 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
742 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
743 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
744 printf("_arraySliceSliceAddass_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
745 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
746 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
747 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
748 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
749 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
750 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
751 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
752 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
753 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
754 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
755 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
756 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
757 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
758 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
759 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
760 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
761 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
762 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
763 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
764 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
765 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
766 c[] += b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
767 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
768 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
769 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
770 if (c[i] != cast(T)(a[i] + b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
771 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
772 printf("[%d]: %g != %g + %g\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
773 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
774 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
775 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
776 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
777 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
778 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
779 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
780 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
781 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
782 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
783 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
784 * a[] = b[] - value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
785 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
786 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
787 T[] _arraySliceExpMinSliceAssign_f(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
788 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
789 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
790 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
791 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
792 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
793 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
794 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
795 //printf("_arraySliceExpMinSliceAssign_f()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
796 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
797 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
798 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
799 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
800 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
801 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
802 // SSE version is 622% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
803 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
804 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
805 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
806 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
807 // Unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
808 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
809 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
810 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
811 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
812 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
813 movss XMM4, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
814 shufps XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
815 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
816 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
817 startsseloop: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
818 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
819 movups XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
820 movups XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
821 movups XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
822 movups XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
823 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
824 subps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
825 subps XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
826 subps XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
827 subps XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
828 movups [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
829 movups [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
830 movups [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
831 movups [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
832 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
833 jb startsseloop; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
834 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
835 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
836 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
837 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
838 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
839 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
840 // 3DNow! version is 67% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
841 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
842 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
843 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
844 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
845 T[2] w; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
846 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
847 w[0] = w[1] = value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
848 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
849 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
850 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
851 mov ESI, dword ptr [aptr]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
852 mov EDI, dword ptr [n]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
853 mov EAX, dword ptr [bptr]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
854 movq MM4, qword ptr [w]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
855 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
856 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
857 start3dnow: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
858 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
859 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
860 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
861 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
862 pfsub MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
863 pfsub MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
864 pfsub MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
865 pfsub MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
866 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
867 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
868 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
869 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
870 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
871 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
872 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
873 jb start3dnow; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
874 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
875 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
876 mov dword ptr [aptr], ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
877 mov dword ptr [bptr], EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
878 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
879 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
880 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
881 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
882 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
883 *aptr++ = *bptr++ - value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
884 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
885 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
886 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
887 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
888 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
889 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
890 printf("_arraySliceExpMinSliceAssign_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
891 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
892 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
893 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
894 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
895 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
896 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
897 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
898 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
899 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
900 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
901 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
902 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
903 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
904 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
905 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
906 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
907 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
908 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
909 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
910 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
911 c[] = a[] - 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
912 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
913 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
914 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
915 if (c[i] != cast(T)(a[i] - 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
916 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
917 printf("[%d]: %g != %g - 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
918 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
919 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
920 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
921 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
922 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
923 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
924 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
925 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
926 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
927 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
928 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
929 * a[] = value - b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
930 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
931 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
932 T[] _arrayExpSliceMinSliceAssign_f(T[] a, T[] b, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
933 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
934 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
935 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
936 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
937 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
938 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
939 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
940 //printf("_arrayExpSliceMinSliceAssign_f()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
941 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
942 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
943 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
944 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
945 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
946 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
947 // SSE version is 690% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
948 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
949 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
950 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
951 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
952 // Unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
953 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
954 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
955 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
956 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
957 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
958 movss XMM4, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
959 shufps XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
960 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
961 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
962 startsseloop: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
963 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
964 movaps XMM5, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
965 movaps XMM6, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
966 movups XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
967 movups XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
968 movups XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
969 movups XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
970 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
971 subps XMM5, XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
972 subps XMM6, XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
973 movups [ESI+ 0-64], XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
974 movups [ESI+16-64], XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
975 movaps XMM5, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
976 movaps XMM6, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
977 subps XMM5, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
978 subps XMM6, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
979 movups [ESI+32-64], XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
980 movups [ESI+48-64], XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
981 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
982 jb startsseloop; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
983 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
984 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
985 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
986 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
987 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
988 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
989 // 3DNow! version is 67% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
990 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
991 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
992 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
993 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
994 ulong w = *cast(uint *) &value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
995 ulong v = w | (w << 32L); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
996 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
997 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
998 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
999 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1000 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1001 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1002 movq MM4, qword ptr [v]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1003 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1004 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1005 start3dnow: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1006 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1007 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1008 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1009 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1010 pfsubr MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1011 pfsubr MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1012 pfsubr MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1013 pfsubr MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1014 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1015 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1016 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1017 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1018 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1019 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1020 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1021 jb start3dnow; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1022 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1023 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1024 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1025 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1026 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1027 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1028 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1029 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1030 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1031 *aptr++ = value - *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1032 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1033 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1034 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1035 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1036 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1037 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1038 printf("_arrayExpSliceMinSliceAssign_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1039 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1040 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1041 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1042 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1043 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1044 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1045 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1046 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1047 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1048 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1049 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1050 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1051 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1052 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1053 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1054 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1055 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1056 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1057 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1058 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1059 c[] = 6 - a[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1060 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1061 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1062 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1063 if (c[i] != cast(T)(6 - a[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1064 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1065 printf("[%d]: %g != 6 - %g\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1066 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1067 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1068 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1069 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1070 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1071 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1072 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1073 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1074 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1075 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1076 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1077 * a[] -= value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1078 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1079 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1080 T[] _arrayExpSliceMinass_f(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1081 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1082 //printf("_arrayExpSliceMinass_f(a.length = %d, value = %Lg)\n", a.length, cast(real)value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1083 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1084 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1085 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1086 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1087 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1088 // SSE version is 304% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1089 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1090 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1091 // align pointer |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1092 auto n = cast(T*)((cast(uint)aptr + 15) & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1093 while (aptr < n) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1094 *aptr++ -= value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1095 n = cast(T*)((cast(uint)aend) & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1096 if (aptr < n) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1097 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1098 // Aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1099 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1100 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1101 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1102 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1103 movss XMM4, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1104 shufps XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1105 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1106 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1107 startsseloopa: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1108 movaps XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1109 movaps XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1110 movaps XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1111 movaps XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1112 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1113 subps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1114 subps XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1115 subps XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1116 subps XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1117 movaps [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1118 movaps [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1119 movaps [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1120 movaps [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1121 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1122 jb startsseloopa; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1123 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1124 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1125 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1126 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1127 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1128 // 3DNow! version is 63% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1129 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1130 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1131 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1132 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1133 ulong w = *cast(uint *) &value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1134 ulong v = w | (w << 32L); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1135 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1136 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1137 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1138 mov ESI, dword ptr [aptr]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1139 mov EDI, dword ptr [n]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1140 movq MM4, qword ptr [v]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1141 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1142 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1143 start: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1144 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1145 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1146 movq MM2, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1147 movq MM3, [ESI+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1148 pfsub MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1149 pfsub MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1150 pfsub MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1151 pfsub MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1152 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1153 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1154 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1155 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1156 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1157 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1158 jb start; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1159 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1160 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1161 mov dword ptr [aptr], ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1162 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1163 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1164 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1165 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1166 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1167 *aptr++ -= value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1168 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1169 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1170 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1171 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1172 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1173 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1174 printf("_arrayExpSliceminass_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1175 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1176 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1177 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1178 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1179 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1180 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1181 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1182 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1183 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1184 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1185 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1186 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1187 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1188 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1189 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1190 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1191 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1192 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1193 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1194 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1195 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1196 c[] -= 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1197 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1198 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1199 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1200 if (c[i] != cast(T)(a[i] - 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1201 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1202 printf("[%d]: %g != %g - 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1203 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1204 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1205 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1206 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1207 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1208 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1209 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1210 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1211 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1212 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1213 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1214 * a[] -= b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1215 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1216 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1217 T[] _arraySliceSliceMinass_f(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1218 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1219 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1220 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1221 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1222 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1223 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1224 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1225 //printf("_arraySliceSliceMinass_f()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1226 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1227 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1228 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1229 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1230 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1231 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1232 // SSE version is 468% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1233 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1234 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1235 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1236 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1237 // Unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1238 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1239 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1240 mov ECX, bptr; // right operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1241 mov ESI, aptr; // destination operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1242 mov EDI, n; // end comparison |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1243 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1244 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1245 startsseloopb: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1246 movups XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1247 movups XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1248 movups XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1249 movups XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1250 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1251 movups XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1252 movups XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1253 movups XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1254 movups XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1255 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1256 subps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1257 subps XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1258 subps XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1259 subps XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1260 movups [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1261 movups [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1262 movups [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1263 movups [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1264 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1265 jb startsseloopb; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1266 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1267 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1268 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1269 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1270 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1271 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1272 // 3DNow! version is 57% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1273 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1274 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1275 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1276 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1277 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1278 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1279 mov ESI, dword ptr [aptr]; // destination operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1280 mov EDI, dword ptr [n]; // end comparison |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1281 mov ECX, dword ptr [bptr]; // right operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1282 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1283 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1284 start: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1285 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1286 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1287 movq MM2, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1288 movq MM3, [ESI+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1289 pfsub MM0, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1290 pfsub MM1, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1291 pfsub MM2, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1292 pfsub MM3, [ECX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1293 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1294 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1295 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1296 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1297 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1298 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1299 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1300 jb start; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1301 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1302 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1303 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1304 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1305 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1306 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1307 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1308 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1309 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1310 *aptr++ -= *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1311 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1312 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1313 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1314 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1315 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1316 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1317 printf("_arrayExpSliceMinass_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1318 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1319 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1320 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1321 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1322 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1323 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1324 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1325 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1326 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1327 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1328 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1329 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1330 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1331 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1332 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1333 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1334 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1335 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1336 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1337 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1338 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1339 c[] -= 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1340 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1341 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1342 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1343 if (c[i] != cast(T)(a[i] - 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1344 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1345 printf("[%d]: %g != %g - 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1346 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1347 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1348 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1349 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1350 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1351 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1352 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1353 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1354 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1355 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1356 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1357 * a[] = b[] * value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1358 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1359 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1360 T[] _arraySliceExpMulSliceAssign_f(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1361 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1362 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1363 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1364 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1365 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1366 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1367 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1368 //printf("_arraySliceExpMulSliceAssign_f()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1369 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1370 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1371 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1372 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1373 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1374 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1375 // SSE version is 607% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1376 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1377 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1378 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1379 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1380 // Unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1381 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1382 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1383 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1384 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1385 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1386 movss XMM4, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1387 shufps XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1388 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1389 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1390 startsseloop: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1391 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1392 movups XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1393 movups XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1394 movups XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1395 movups XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1396 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1397 mulps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1398 mulps XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1399 mulps XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1400 mulps XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1401 movups [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1402 movups [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1403 movups [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1404 movups [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1405 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1406 jb startsseloop; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1407 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1408 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1409 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1410 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1411 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1412 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1413 // 3DNow! version is 69% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1414 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1415 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1416 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1417 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1418 ulong w = *cast(uint *) &value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1419 ulong v = w | (w << 32L); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1420 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1421 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1422 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1423 mov ESI, dword ptr [aptr]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1424 mov EDI, dword ptr [n]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1425 mov EAX, dword ptr [bptr]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1426 movq MM4, qword ptr [v]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1427 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1428 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1429 start: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1430 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1431 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1432 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1433 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1434 pfmul MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1435 pfmul MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1436 pfmul MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1437 pfmul MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1438 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1439 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1440 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1441 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1442 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1443 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1444 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1445 jb start; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1446 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1447 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1448 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1449 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1450 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1451 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1452 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1453 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1454 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1455 *aptr++ = *bptr++ * value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1456 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1457 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1458 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1459 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1460 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1461 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1462 printf("_arraySliceExpMulSliceAssign_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1463 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1464 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1465 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1466 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1467 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1468 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1469 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1470 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1471 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1472 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1473 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1474 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1475 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1476 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1477 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1478 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1479 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1480 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1481 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1482 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1483 c[] = a[] * 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1484 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1485 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1486 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1487 if (c[i] != cast(T)(a[i] * 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1488 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1489 printf("[%d]: %g != %g * 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1490 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1491 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1492 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1493 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1494 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1495 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1496 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1497 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1498 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1499 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1500 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1501 * a[] = b[] * c[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1502 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1503 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1504 T[] _arraySliceSliceMulSliceAssign_f(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1505 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1506 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1507 assert(a.length == b.length && b.length == c.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1508 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1509 assert(disjoint(a, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1510 assert(disjoint(b, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1511 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1512 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1513 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1514 //printf("_arraySliceSliceMulSliceAssign_f()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1515 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1516 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1517 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1518 auto cptr = c.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1519 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1520 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1521 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1522 // SSE version is 833% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1523 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1524 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1525 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1526 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1527 // Unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1528 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1529 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1530 mov EAX, bptr; // left operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1531 mov ECX, cptr; // right operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1532 mov ESI, aptr; // destination operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1533 mov EDI, n; // end comparison |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1534 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1535 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1536 startsseloopb: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1537 movups XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1538 movups XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1539 movups XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1540 movups XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1541 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1542 movups XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1543 movups XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1544 movups XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1545 movups XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1546 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1547 mulps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1548 mulps XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1549 mulps XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1550 mulps XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1551 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1552 movups [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1553 movups [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1554 movups [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1555 movups [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1556 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1557 jb startsseloopb; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1558 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1559 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1560 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1561 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1562 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1563 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1564 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1565 // 3DNow! version is only 13% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1566 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1567 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1568 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1569 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1570 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1571 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1572 mov ESI, dword ptr [aptr]; // destination operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1573 mov EDI, dword ptr [n]; // end comparison |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1574 mov EAX, dword ptr [bptr]; // left operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1575 mov ECX, dword ptr [cptr]; // right operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1576 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1577 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1578 start: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1579 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1580 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1581 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1582 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1583 pfmul MM0, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1584 pfmul MM1, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1585 pfmul MM2, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1586 pfmul MM3, [ECX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1587 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1588 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1589 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1590 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1591 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1592 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1593 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1594 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1595 jb start; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1596 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1597 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1598 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1599 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1600 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1601 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1602 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1603 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1604 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1605 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1606 *aptr++ = *bptr++ * *cptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1607 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1608 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1609 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1610 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1611 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1612 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1613 printf("_arraySliceSliceMulSliceAssign_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1614 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1615 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1616 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1617 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1618 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1619 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1620 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1621 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1622 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1623 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1624 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1625 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1626 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1627 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1628 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1629 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1630 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1631 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1632 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1633 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1634 c[] = a[] * b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1635 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1636 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1637 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1638 if (c[i] != cast(T)(a[i] * b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1639 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1640 printf("[%d]: %g != %g * %g\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1641 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1642 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1643 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1644 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1645 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1646 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1647 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1648 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1649 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1650 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1651 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1652 * a[] *= value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1653 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1654 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1655 T[] _arrayExpSliceMulass_f(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1656 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1657 //printf("_arrayExpSliceMulass_f(a.length = %d, value = %Lg)\n", a.length, cast(real)value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1658 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1659 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1660 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1661 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1662 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1663 // SSE version is 303% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1664 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1665 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1666 // align pointer |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1667 auto n = cast(T*)((cast(uint)aptr + 15) & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1668 while (aptr < n) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1669 *aptr++ *= value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1670 n = cast(T*)((cast(uint)aend) & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1671 if (aptr < n) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1672 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1673 // Aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1674 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1675 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1676 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1677 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1678 movss XMM4, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1679 shufps XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1680 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1681 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1682 startsseloopa: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1683 movaps XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1684 movaps XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1685 movaps XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1686 movaps XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1687 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1688 mulps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1689 mulps XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1690 mulps XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1691 mulps XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1692 movaps [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1693 movaps [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1694 movaps [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1695 movaps [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1696 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1697 jb startsseloopa; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1698 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1699 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1700 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1701 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1702 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1703 // 3DNow! version is 63% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1704 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1705 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1706 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1707 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1708 ulong w = *cast(uint *) &value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1709 ulong v = w | (w << 32L); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1710 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1711 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1712 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1713 mov ESI, dword ptr [aptr]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1714 mov EDI, dword ptr [n]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1715 movq MM4, qword ptr [v]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1716 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1717 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1718 start: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1719 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1720 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1721 movq MM2, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1722 movq MM3, [ESI+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1723 pfmul MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1724 pfmul MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1725 pfmul MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1726 pfmul MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1727 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1728 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1729 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1730 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1731 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1732 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1733 jb start; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1734 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1735 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1736 mov dword ptr [aptr], ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1737 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1738 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1739 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1740 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1741 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1742 *aptr++ *= value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1743 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1744 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1745 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1746 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1747 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1748 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1749 printf("_arrayExpSliceMulass_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1750 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1751 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1752 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1753 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1754 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1755 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1756 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1757 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1758 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1759 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1760 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1761 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1762 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1763 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1764 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1765 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1766 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1767 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1768 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1769 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1770 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1771 c[] *= 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1772 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1773 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1774 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1775 if (c[i] != cast(T)(a[i] * 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1776 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1777 printf("[%d]: %g != %g * 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1778 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1779 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1780 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1781 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1782 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1783 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1784 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1785 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1786 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1787 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1788 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1789 * a[] *= b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1790 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1791 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1792 T[] _arraySliceSliceMulass_f(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1793 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1794 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1795 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1796 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1797 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1798 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1799 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1800 //printf("_arraySliceSliceMulass_f()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1801 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1802 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1803 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1804 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1805 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1806 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1807 // SSE version is 525% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1808 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1809 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1810 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1811 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1812 // Unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1813 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1814 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1815 mov ECX, bptr; // right operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1816 mov ESI, aptr; // destination operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1817 mov EDI, n; // end comparison |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1818 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1819 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1820 startsseloopb: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1821 movups XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1822 movups XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1823 movups XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1824 movups XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1825 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1826 movups XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1827 movups XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1828 movups XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1829 movups XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1830 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1831 mulps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1832 mulps XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1833 mulps XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1834 mulps XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1835 movups [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1836 movups [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1837 movups [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1838 movups [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1839 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1840 jb startsseloopb; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1841 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1842 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1843 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1844 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1845 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1846 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1847 // 3DNow! version is 57% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1848 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1849 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1850 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1851 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1852 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1853 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1854 mov ESI, dword ptr [aptr]; // destination operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1855 mov EDI, dword ptr [n]; // end comparison |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1856 mov ECX, dword ptr [bptr]; // right operand |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1857 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1858 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1859 start: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1860 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1861 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1862 movq MM2, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1863 movq MM3, [ESI+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1864 pfmul MM0, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1865 pfmul MM1, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1866 pfmul MM2, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1867 pfmul MM3, [ECX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1868 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1869 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1870 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1871 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1872 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1873 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1874 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1875 jb start; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1876 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1877 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1878 mov dword ptr [aptr], ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1879 mov dword ptr [bptr], ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1880 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1881 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1882 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1883 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1884 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1885 *aptr++ *= *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1886 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1887 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1888 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1889 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1890 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1891 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1892 printf("_arrayExpSliceMulass_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1893 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1894 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1895 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1896 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1897 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1898 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1899 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1900 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1901 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1902 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1903 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1904 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1905 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1906 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1907 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1908 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1909 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1910 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1911 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1912 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1913 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1914 c[] *= 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1915 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1916 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1917 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1918 if (c[i] != cast(T)(a[i] * 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1919 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1920 printf("[%d]: %g != %g * 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1921 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1922 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1923 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1924 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1925 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1926 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1927 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1928 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1929 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1930 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1931 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1932 * a[] = b[] / value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1933 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1934 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1935 T[] _arraySliceExpDivSliceAssign_f(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1936 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1937 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1938 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1939 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1940 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1941 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1942 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1943 //printf("_arraySliceExpDivSliceAssign_f()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1944 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1945 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1946 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1947 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1948 /* Multiplying by the reciprocal is faster, but does |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1949 * not produce as accurate an answer. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1950 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1951 T recip = cast(T)1 / value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1952 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1953 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1954 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1955 // SSE version is 587% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1956 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1957 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1958 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1959 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1960 // Unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1961 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1962 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1963 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1964 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1965 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1966 movss XMM4, recip; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1967 //movss XMM4, value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1968 //rcpss XMM4, XMM4 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1969 shufps XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1970 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1971 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1972 startsseloop: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1973 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1974 movups XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1975 movups XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1976 movups XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1977 movups XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1978 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1979 mulps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1980 mulps XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1981 mulps XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1982 mulps XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1983 //divps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1984 //divps XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1985 //divps XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1986 //divps XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1987 movups [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1988 movups [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1989 movups [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1990 movups [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1991 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1992 jb startsseloop; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1993 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1994 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1995 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1996 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1997 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1998 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1999 // 3DNow! version is 72% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2000 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2001 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2002 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2003 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2004 T[2] w = void; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2005 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2006 w[0] = recip; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2007 w[1] = recip; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2008 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2009 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2010 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2011 mov ESI, dword ptr [aptr]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2012 mov EDI, dword ptr [n]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2013 mov EAX, dword ptr [bptr]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2014 movq MM4, qword ptr [w]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2015 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2016 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2017 start: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2018 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2019 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2020 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2021 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2022 pfmul MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2023 pfmul MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2024 pfmul MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2025 pfmul MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2026 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2027 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2028 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2029 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2030 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2031 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2032 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2033 jb start; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2034 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2035 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2036 mov dword ptr [aptr], ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2037 mov dword ptr [bptr], EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2038 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2039 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2040 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2041 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2042 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2043 *aptr++ = *bptr++ * recip; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2044 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2045 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2046 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2047 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2048 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2049 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2050 printf("_arraySliceExpDivSliceAssign_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2051 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2052 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2053 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2054 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2055 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2056 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2057 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2058 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2059 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2060 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2061 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2062 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2063 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2064 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2065 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2066 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2067 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2068 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2069 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2070 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2071 c[] = a[] / 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2072 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2073 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2074 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2075 if (c[i] != cast(T)(a[i] / 8)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2076 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2077 printf("[%d]: %g != %g / 8\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2078 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2079 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2080 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2081 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2082 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2083 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2084 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2085 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2086 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2087 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2088 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2089 * a[] /= value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2090 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2091 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2092 T[] _arrayExpSliceDivass_f(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2093 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2094 //printf("_arrayExpSliceDivass_f(a.length = %d, value = %Lg)\n", a.length, cast(real)value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2095 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2096 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2097 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2098 /* Multiplying by the reciprocal is faster, but does |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2099 * not produce as accurate an answer. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2100 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2101 T recip = cast(T)1 / value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2102 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2103 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2104 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2105 // SSE version is 245% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2106 if (sse() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2107 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2108 // align pointer |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2109 auto n = cast(T*)((cast(uint)aptr + 15) & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2110 while (aptr < n) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2111 *aptr++ *= recip; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2112 n = cast(T*)((cast(uint)aend) & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2113 if (aptr < n) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2114 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2115 // Aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2116 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2117 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2118 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2119 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2120 movss XMM4, recip; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2121 //movss XMM4, value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2122 //rcpss XMM4, XMM4 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2123 shufps XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2124 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2125 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2126 startsseloopa: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2127 movaps XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2128 movaps XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2129 movaps XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2130 movaps XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2131 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2132 mulps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2133 mulps XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2134 mulps XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2135 mulps XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2136 //divps XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2137 //divps XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2138 //divps XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2139 //divps XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2140 movaps [ESI+ 0-64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2141 movaps [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2142 movaps [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2143 movaps [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2144 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2145 jb startsseloopa; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2146 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2147 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2148 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2149 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2150 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2151 // 3DNow! version is 57% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2152 if (amd3dnow() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2153 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2154 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2155 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2156 T[2] w = void; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2157 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2158 w[0] = w[1] = recip; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2159 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2160 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2161 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2162 mov ESI, dword ptr [aptr]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2163 mov EDI, dword ptr [n]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2164 movq MM4, qword ptr [w]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2165 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2166 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2167 start: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2168 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2169 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2170 movq MM2, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2171 movq MM3, [ESI+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2172 pfmul MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2173 pfmul MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2174 pfmul MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2175 pfmul MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2176 movq [ESI], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2177 movq [ESI+8], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2178 movq [ESI+16], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2179 movq [ESI+24], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2180 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2181 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2182 jb start; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2183 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2184 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2185 mov dword ptr [aptr], ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2186 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2187 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2188 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2189 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2190 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2191 *aptr++ *= recip; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2192 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2193 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2194 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2195 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2196 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2197 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2198 printf("_arrayExpSliceDivass_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2199 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2200 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2201 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2202 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2203 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2204 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2205 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2206 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2207 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2208 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2209 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2210 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2211 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2212 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2213 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2214 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2215 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2216 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2217 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2218 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2219 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2220 c[] /= 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2221 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2222 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2223 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2224 if (c[i] != cast(T)(a[i] / 8)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2225 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2226 printf("[%d]: %g != %g / 8\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2227 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2228 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2229 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2230 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2231 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2232 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2233 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2234 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2235 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2236 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2237 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2238 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2239 * a[] -= b[] * value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2240 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2241 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2242 T[] _arraySliceExpMulSliceMinass_f(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2243 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2244 return _arraySliceExpMulSliceAddass_f(a, -value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2245 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2246 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2247 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2248 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2249 * a[] += b[] * value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2250 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2251 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2252 T[] _arraySliceExpMulSliceAddass_f(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2253 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2254 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2255 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2256 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2257 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2258 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2259 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2260 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2261 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2262 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2263 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2264 // Handle remainder |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2265 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2266 *aptr++ += *bptr++ * value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2267 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2268 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2269 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2270 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2271 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2272 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2273 printf("_arraySliceExpMulSliceAddass_f unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2274 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2275 cpuid = 1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2276 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2277 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2278 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2279 for (int j = 0; j < 1; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2280 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2281 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2282 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2283 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2284 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2285 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2286 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2287 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2288 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2289 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2290 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2291 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2292 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2293 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2294 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2295 b[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2296 c[] += a[] * 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2297 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2298 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2299 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2300 //printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2301 if (c[i] != cast(T)(b[i] + a[i] * 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2302 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2303 printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2304 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2305 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2306 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2307 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2308 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2309 } |