Mercurial > projects > ldc
annotate druntime/src/compiler/dmd/arrayshort.d @ 1458:e0b2d67cfe7c
Added druntime (this should be removed once it works).
author | Robert Clipsham <robert@octarineparrot.com> |
---|---|
date | Tue, 02 Jun 2009 17:43:06 +0100 |
parents | |
children |
rev | line source |
---|---|
1458
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1 /** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2 * Contains SSE2 and MMX versions of certain operations for wchar, short, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
3 * and ushort ('u', 's' and 't' suffixes). |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
4 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
5 * Copyright: Copyright Digital Mars 2008 - 2009. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
6 * License: <a href="http://www.boost.org/LICENSE_1_0.txt>Boost License 1.0</a>. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
7 * Authors: Walter Bright, based on code originally written by Burton Radons |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
8 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
9 * Copyright Digital Mars 2008 - 2009. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
10 * Distributed under the Boost Software License, Version 1.0. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
11 * (See accompanying file LICENSE_1_0.txt or copy at |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
12 * http://www.boost.org/LICENSE_1_0.txt) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
13 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
14 module rt.arrayshort; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
15 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
16 private import rt.util.cpuid; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
17 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
18 version (unittest) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
19 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
20 private import core.stdc.stdio : printf; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
21 /* This is so unit tests will test every CPU variant |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
22 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
23 int cpuid; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
24 const int CPUID_MAX = 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
25 bool mmx() { return cpuid == 1 && rt.util.cpuid.mmx(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
26 bool sse() { return cpuid == 2 && rt.util.cpuid.sse(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
27 bool sse2() { return cpuid == 3 && rt.util.cpuid.sse2(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
28 bool amd3dnow() { return cpuid == 4 && rt.util.cpuid.amd3dnow(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
29 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
30 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
31 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
32 alias rt.util.cpuid.mmx mmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
33 alias rt.util.cpuid.sse sse; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
34 alias rt.util.cpuid.sse2 sse2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
35 alias rt.util.cpuid.sse2 sse2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
36 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
37 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
38 //version = log; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
39 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
40 bool disjoint(T)(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
41 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
42 return (a.ptr + a.length <= b.ptr || b.ptr + b.length <= a.ptr); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
43 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
44 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
45 alias short T; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
46 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
47 extern (C): |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
48 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
49 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
50 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
51 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
52 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
53 * a[] = b[] + value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
54 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
55 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
56 T[] _arraySliceExpAddSliceAssign_u(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
57 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
58 return _arraySliceExpAddSliceAssign_s(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
59 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
60 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
61 T[] _arraySliceExpAddSliceAssign_t(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
62 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
63 return _arraySliceExpAddSliceAssign_s(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
64 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
65 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
66 T[] _arraySliceExpAddSliceAssign_s(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
67 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
68 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
69 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
70 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
71 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
72 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
73 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
74 //printf("_arraySliceExpAddSliceAssign_s()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
75 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
76 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
77 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
78 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
79 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
80 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
81 // SSE2 aligned version is 3343% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
82 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
83 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
84 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
85 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
86 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
87 l |= (l << 16); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
88 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
89 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
90 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
91 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
92 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
93 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
94 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
95 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
96 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
97 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
98 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
99 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
100 startaddsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
101 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
102 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
103 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
104 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
105 paddw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
106 paddw XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
107 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
108 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
109 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
110 jb startaddsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
111 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
112 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
113 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
114 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
115 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
116 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
117 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
118 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
119 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
120 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
121 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
122 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
123 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
124 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
125 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
126 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
127 startaddsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
128 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
129 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
130 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
131 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
132 paddw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
133 paddw XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
134 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
135 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
136 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
137 jb startaddsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
138 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
139 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
140 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
141 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
142 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
143 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
144 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
145 // MMX version is 3343% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
146 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
147 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
148 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
149 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
150 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
151 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
152 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
153 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
154 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
155 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
156 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
157 movd MM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
158 pshufw MM2, MM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
159 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
160 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
161 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
162 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
163 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
164 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
165 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
166 paddw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
167 paddw MM1, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
168 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
169 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
170 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
171 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
172 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
173 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
174 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
175 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
176 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
177 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
178 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
179 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
180 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
181 *aptr++ = cast(T)(*bptr++ + value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
182 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
183 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
184 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
185 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
186 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
187 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
188 printf("_arraySliceExpAddSliceAssign_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
189 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
190 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
191 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
192 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
193 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
194 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
195 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
196 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
197 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
198 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
199 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
200 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
201 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
202 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
203 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
204 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
205 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
206 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
207 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
208 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
209 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
210 c[] = a[] + 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
211 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
212 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
213 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
214 if (c[i] != cast(T)(a[i] + 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
215 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
216 printf("[%d]: %d != %d + 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
217 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
218 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
219 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
220 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
221 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
222 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
223 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
224 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
225 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
226 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
227 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
228 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
229 * a[] = b[] + c[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
230 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
231 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
232 T[] _arraySliceSliceAddSliceAssign_u(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
233 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
234 return _arraySliceSliceAddSliceAssign_s(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
235 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
236 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
237 T[] _arraySliceSliceAddSliceAssign_t(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
238 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
239 return _arraySliceSliceAddSliceAssign_s(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
240 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
241 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
242 T[] _arraySliceSliceAddSliceAssign_s(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
243 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
244 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
245 assert(a.length == b.length && b.length == c.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
246 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
247 assert(disjoint(a, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
248 assert(disjoint(b, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
249 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
250 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
251 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
252 //printf("_arraySliceSliceAddSliceAssign_s()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
253 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
254 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
255 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
256 auto cptr = c.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
257 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
258 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
259 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
260 // SSE2 aligned version is 3777% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
261 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
262 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
263 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
264 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
265 if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
266 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
267 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
268 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
269 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
270 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
271 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
272 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
273 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
274 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
275 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
276 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
277 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
278 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
279 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
280 movdqu XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
281 movdqu XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
282 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
283 paddw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
284 paddw XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
285 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
286 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
287 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
288 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
289 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
290 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
291 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
292 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
293 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
294 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
295 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
296 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
297 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
298 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
299 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
300 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
301 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
302 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
303 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
304 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
305 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
306 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
307 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
308 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
309 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
310 movdqa XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
311 movdqa XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
312 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
313 paddw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
314 paddw XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
315 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
316 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
317 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
318 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
319 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
320 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
321 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
322 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
323 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
324 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
325 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
326 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
327 // MMX version is 2068% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
328 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
329 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
330 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
331 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
332 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
333 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
334 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
335 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
336 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
337 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
338 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
339 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
340 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
341 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
342 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
343 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
344 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
345 movq MM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
346 movq MM3, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
347 add ECX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
348 paddw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
349 paddw MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
350 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
351 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
352 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
353 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
354 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
355 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
356 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
357 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
358 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
359 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
360 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
361 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
362 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
363 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
364 *aptr++ = cast(T)(*bptr++ + *cptr++); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
365 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
366 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
367 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
368 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
369 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
370 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
371 printf("_arraySliceSliceAddSliceAssign_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
372 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
373 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
374 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
375 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
376 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
377 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
378 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
379 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
380 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
381 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
382 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
383 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
384 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
385 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
386 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
387 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
388 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
389 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
390 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
391 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
392 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
393 c[] = a[] + b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
394 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
395 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
396 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
397 if (c[i] != cast(T)(a[i] + b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
398 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
399 printf("[%d]: %d != %d + %d\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
400 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
401 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
402 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
403 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
404 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
405 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
406 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
407 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
408 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
409 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
410 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
411 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
412 * a[] += value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
413 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
414 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
415 T[] _arrayExpSliceAddass_u(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
416 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
417 return _arrayExpSliceAddass_s(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
418 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
419 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
420 T[] _arrayExpSliceAddass_t(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
421 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
422 return _arrayExpSliceAddass_s(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
423 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
424 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
425 T[] _arrayExpSliceAddass_s(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
426 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
427 //printf("_arrayExpSliceAddass_s(a.length = %d, value = %Lg)\n", a.length, cast(real)value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
428 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
429 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
430 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
431 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
432 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
433 // SSE2 aligned version is 832% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
434 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
435 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
436 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
437 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
438 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
439 l |= (l << 16); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
440 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
441 if (((cast(uint) aptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
442 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
443 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
444 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
445 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
446 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
447 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
448 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
449 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
450 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
451 startaddsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
452 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
453 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
454 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
455 paddw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
456 paddw XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
457 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
458 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
459 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
460 jb startaddsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
461 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
462 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
463 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
464 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
465 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
466 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
467 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
468 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
469 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
470 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
471 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
472 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
473 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
474 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
475 startaddsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
476 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
477 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
478 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
479 paddw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
480 paddw XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
481 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
482 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
483 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
484 jb startaddsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
485 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
486 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
487 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
488 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
489 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
490 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
491 // MMX version is 826% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
492 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
493 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
494 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
495 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
496 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
497 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
498 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
499 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
500 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
501 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
502 movd MM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
503 pshufw MM2, MM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
504 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
505 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
506 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
507 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
508 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
509 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
510 paddw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
511 paddw MM1, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
512 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
513 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
514 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
515 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
516 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
517 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
518 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
519 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
520 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
521 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
522 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
523 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
524 *aptr++ += value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
525 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
526 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
527 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
528 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
529 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
530 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
531 printf("_arrayExpSliceAddass_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
532 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
533 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
534 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
535 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
536 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
537 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
538 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
539 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
540 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
541 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
542 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
543 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
544 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
545 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
546 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
547 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
548 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
549 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
550 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
551 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
552 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
553 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
554 a[] += 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
555 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
556 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
557 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
558 if (a[i] != cast(T)(c[i] + 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
559 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
560 printf("[%d]: %d != %d + 6\n", i, a[i], c[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
561 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
562 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
563 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
564 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
565 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
566 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
567 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
568 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
569 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
570 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
571 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
572 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
573 * a[] += b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
574 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
575 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
576 T[] _arraySliceSliceAddass_u(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
577 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
578 return _arraySliceSliceAddass_s(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
579 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
580 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
581 T[] _arraySliceSliceAddass_t(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
582 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
583 return _arraySliceSliceAddass_s(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
584 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
585 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
586 T[] _arraySliceSliceAddass_s(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
587 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
588 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
589 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
590 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
591 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
592 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
593 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
594 //printf("_arraySliceSliceAddass_s()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
595 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
596 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
597 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
598 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
599 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
600 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
601 // SSE2 aligned version is 2085% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
602 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
603 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
604 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
605 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
606 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
607 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
608 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
609 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
610 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
611 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
612 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
613 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
614 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
615 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
616 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
617 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
618 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
619 movdqu XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
620 movdqu XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
621 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
622 paddw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
623 paddw XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
624 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
625 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
626 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
627 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
628 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
629 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
630 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
631 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
632 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
633 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
634 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
635 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
636 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
637 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
638 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
639 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
640 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
641 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
642 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
643 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
644 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
645 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
646 movdqa XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
647 movdqa XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
648 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
649 paddw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
650 paddw XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
651 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
652 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
653 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
654 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
655 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
656 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
657 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
658 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
659 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
660 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
661 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
662 // MMX version is 1022% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
663 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
664 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
665 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
666 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
667 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
668 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
669 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
670 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
671 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
672 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
673 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
674 start: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
675 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
676 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
677 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
678 movq MM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
679 movq MM3, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
680 add ECX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
681 paddw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
682 paddw MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
683 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
684 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
685 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
686 jb start; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
687 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
688 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
689 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
690 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
691 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
692 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
693 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
694 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
695 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
696 *aptr++ += *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
697 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
698 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
699 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
700 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
701 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
702 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
703 printf("_arraySliceSliceAddass_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
704 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
705 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
706 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
707 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
708 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
709 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
710 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
711 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
712 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
713 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
714 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
715 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
716 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
717 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
718 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
719 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
720 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
721 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
722 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
723 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
724 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
725 b[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
726 c[] += a[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
727 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
728 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
729 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
730 if (c[i] != cast(T)(b[i] + a[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
731 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
732 printf("[%d]: %d != %d + %d\n", i, c[i], b[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
733 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
734 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
735 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
736 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
737 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
738 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
739 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
740 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
741 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
742 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
743 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
744 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
745 * a[] = b[] - value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
746 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
747 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
748 T[] _arraySliceExpMinSliceAssign_u(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
749 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
750 return _arraySliceExpMinSliceAssign_s(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
751 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
752 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
753 T[] _arraySliceExpMinSliceAssign_t(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
754 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
755 return _arraySliceExpMinSliceAssign_s(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
756 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
757 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
758 T[] _arraySliceExpMinSliceAssign_s(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
759 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
760 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
761 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
762 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
763 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
764 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
765 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
766 //printf("_arraySliceExpMinSliceAssign_s()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
767 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
768 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
769 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
770 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
771 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
772 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
773 // SSE2 aligned version is 3695% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
774 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
775 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
776 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
777 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
778 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
779 l |= (l << 16); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
780 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
781 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
782 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
783 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
784 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
785 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
786 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
787 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
788 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
789 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
790 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
791 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
792 startaddsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
793 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
794 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
795 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
796 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
797 psubw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
798 psubw XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
799 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
800 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
801 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
802 jb startaddsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
803 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
804 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
805 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
806 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
807 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
808 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
809 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
810 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
811 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
812 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
813 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
814 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
815 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
816 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
817 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
818 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
819 startaddsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
820 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
821 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
822 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
823 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
824 psubw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
825 psubw XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
826 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
827 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
828 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
829 jb startaddsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
830 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
831 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
832 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
833 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
834 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
835 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
836 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
837 // MMX version is 3049% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
838 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
839 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
840 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
841 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
842 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
843 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
844 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
845 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
846 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
847 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
848 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
849 movd MM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
850 pshufw MM2, MM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
851 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
852 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
853 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
854 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
855 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
856 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
857 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
858 psubw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
859 psubw MM1, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
860 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
861 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
862 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
863 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
864 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
865 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
866 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
867 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
868 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
869 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
870 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
871 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
872 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
873 *aptr++ = cast(T)(*bptr++ - value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
874 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
875 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
876 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
877 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
878 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
879 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
880 printf("_arraySliceExpMinSliceAssign_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
881 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
882 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
883 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
884 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
885 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
886 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
887 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
888 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
889 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
890 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
891 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
892 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
893 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
894 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
895 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
896 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
897 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
898 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
899 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
900 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
901 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
902 c[] = a[] - 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
903 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
904 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
905 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
906 if (c[i] != cast(T)(a[i] - 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
907 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
908 printf("[%d]: %d != %d - 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
909 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
910 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
911 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
912 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
913 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
914 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
915 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
916 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
917 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
918 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
919 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
920 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
921 * a[] = value - b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
922 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
923 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
924 T[] _arrayExpSliceMinSliceAssign_u(T[] a, T[] b, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
925 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
926 return _arrayExpSliceMinSliceAssign_s(a, b, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
927 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
928 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
929 T[] _arrayExpSliceMinSliceAssign_t(T[] a, T[] b, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
930 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
931 return _arrayExpSliceMinSliceAssign_s(a, b, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
932 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
933 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
934 T[] _arrayExpSliceMinSliceAssign_s(T[] a, T[] b, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
935 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
936 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
937 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
938 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
939 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
940 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
941 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
942 //printf("_arrayExpSliceMinSliceAssign_s()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
943 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
944 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
945 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
946 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
947 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
948 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
949 // SSE2 aligned version is 4995% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
950 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
951 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
952 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
953 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
954 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
955 l |= (l << 16); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
956 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
957 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
958 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
959 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
960 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
961 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
962 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
963 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
964 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
965 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
966 startaddsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
967 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
968 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
969 movd XMM3, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
970 pshufd XMM3, XMM3, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
971 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
972 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
973 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
974 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
975 psubw XMM2, XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
976 psubw XMM3, XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
977 movdqu [ESI -32], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
978 movdqu [ESI+16-32], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
979 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
980 jb startaddsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
981 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
982 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
983 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
984 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
985 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
986 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
987 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
988 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
989 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
990 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
991 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
992 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
993 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
994 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
995 startaddsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
996 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
997 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
998 movd XMM3, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
999 pshufd XMM3, XMM3, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1000 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1001 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1002 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1003 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1004 psubw XMM2, XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1005 psubw XMM3, XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1006 movdqa [ESI -32], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1007 movdqa [ESI+16-32], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1008 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1009 jb startaddsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1010 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1011 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1012 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1013 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1014 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1015 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1016 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1017 // MMX version is 4562% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1018 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1019 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1020 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1021 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1022 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1023 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1024 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1025 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1026 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1027 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1028 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1029 movd MM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1030 pshufw MM4, MM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1031 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1032 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1033 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1034 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1035 movq MM2, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1036 movq MM3, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1037 movq MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1038 movq MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1039 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1040 psubw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1041 psubw MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1042 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1043 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1044 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1045 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1046 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1047 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1048 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1049 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1050 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1051 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1052 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1053 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1054 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1055 *aptr++ = cast(T)(value - *bptr++); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1056 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1057 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1058 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1059 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1060 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1061 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1062 printf("_arrayExpSliceMinSliceAssign_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1063 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1064 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1065 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1066 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1067 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1068 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1069 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1070 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1071 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1072 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1073 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1074 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1075 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1076 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1077 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1078 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1079 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1080 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1081 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1082 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1083 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1084 c[] = 6 - a[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1085 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1086 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1087 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1088 if (c[i] != cast(T)(6 - a[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1089 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1090 printf("[%d]: %d != 6 - %d\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1091 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1092 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1093 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1094 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1095 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1096 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1097 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1098 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1099 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1100 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1101 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1102 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1103 * a[] = b[] - c[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1104 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1105 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1106 T[] _arraySliceSliceMinSliceAssign_u(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1107 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1108 return _arraySliceSliceMinSliceAssign_s(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1109 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1110 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1111 T[] _arraySliceSliceMinSliceAssign_t(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1112 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1113 return _arraySliceSliceMinSliceAssign_s(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1114 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1115 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1116 T[] _arraySliceSliceMinSliceAssign_s(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1117 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1118 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1119 assert(a.length == b.length && b.length == c.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1120 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1121 assert(disjoint(a, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1122 assert(disjoint(b, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1123 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1124 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1125 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1126 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1127 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1128 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1129 auto cptr = c.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1130 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1131 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1132 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1133 // SSE2 aligned version is 4129% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1134 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1135 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1136 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1137 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1138 if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1139 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1140 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1141 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1142 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1143 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1144 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1145 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1146 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1147 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1148 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1149 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1150 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1151 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1152 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1153 movdqu XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1154 movdqu XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1155 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1156 psubw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1157 psubw XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1158 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1159 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1160 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1161 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1162 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1163 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1164 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1165 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1166 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1167 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1168 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1169 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1170 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1171 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1172 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1173 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1174 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1175 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1176 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1177 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1178 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1179 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1180 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1181 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1182 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1183 movdqa XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1184 movdqa XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1185 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1186 psubw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1187 psubw XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1188 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1189 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1190 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1191 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1192 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1193 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1194 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1195 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1196 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1197 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1198 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1199 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1200 // MMX version is 2018% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1201 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1202 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1203 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1204 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1205 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1206 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1207 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1208 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1209 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1210 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1211 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1212 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1213 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1214 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1215 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1216 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1217 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1218 movq MM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1219 movq MM3, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1220 add ECX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1221 psubw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1222 psubw MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1223 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1224 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1225 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1226 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1227 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1228 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1229 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1230 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1231 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1232 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1233 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1234 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1235 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1236 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1237 *aptr++ = cast(T)(*bptr++ - *cptr++); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1238 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1239 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1240 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1241 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1242 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1243 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1244 printf("_arraySliceSliceMinSliceAssign_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1245 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1246 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1247 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1248 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1249 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1250 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1251 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1252 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1253 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1254 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1255 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1256 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1257 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1258 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1259 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1260 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1261 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1262 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1263 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1264 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1265 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1266 c[] = a[] - b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1267 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1268 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1269 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1270 if (c[i] != cast(T)(a[i] - b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1271 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1272 printf("[%d]: %d != %d - %d\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1273 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1274 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1275 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1276 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1277 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1278 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1279 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1280 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1281 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1282 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1283 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1284 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1285 * a[] -= value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1286 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1287 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1288 T[] _arrayExpSliceMinass_u(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1289 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1290 return _arrayExpSliceMinass_s(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1291 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1292 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1293 T[] _arrayExpSliceMinass_t(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1294 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1295 return _arrayExpSliceMinass_s(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1296 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1297 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1298 T[] _arrayExpSliceMinass_s(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1299 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1300 //printf("_arrayExpSliceMinass_s(a.length = %d, value = %Lg)\n", a.length, cast(real)value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1301 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1302 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1303 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1304 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1305 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1306 // SSE2 aligned version is 835% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1307 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1308 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1309 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1310 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1311 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1312 l |= (l << 16); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1313 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1314 if (((cast(uint) aptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1315 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1316 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1317 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1318 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1319 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1320 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1321 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1322 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1323 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1324 startaddsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1325 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1326 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1327 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1328 psubw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1329 psubw XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1330 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1331 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1332 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1333 jb startaddsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1334 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1335 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1336 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1337 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1338 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1339 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1340 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1341 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1342 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1343 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1344 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1345 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1346 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1347 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1348 startaddsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1349 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1350 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1351 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1352 psubw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1353 psubw XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1354 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1355 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1356 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1357 jb startaddsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1358 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1359 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1360 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1361 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1362 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1363 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1364 // MMX version is 835% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1365 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1366 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1367 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1368 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1369 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1370 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1371 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1372 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1373 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1374 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1375 movd MM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1376 pshufw MM2, MM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1377 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1378 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1379 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1380 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1381 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1382 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1383 psubw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1384 psubw MM1, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1385 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1386 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1387 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1388 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1389 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1390 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1391 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1392 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1393 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1394 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1395 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1396 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1397 *aptr++ -= value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1398 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1399 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1400 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1401 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1402 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1403 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1404 printf("_arrayExpSliceMinass_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1405 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1406 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1407 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1408 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1409 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1410 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1411 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1412 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1413 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1414 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1415 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1416 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1417 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1418 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1419 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1420 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1421 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1422 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1423 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1424 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1425 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1426 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1427 a[] -= 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1428 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1429 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1430 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1431 if (a[i] != cast(T)(c[i] - 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1432 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1433 printf("[%d]: %d != %d - 6\n", i, a[i], c[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1434 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1435 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1436 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1437 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1438 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1439 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1440 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1441 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1442 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1443 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1444 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1445 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1446 * a[] -= b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1447 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1448 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1449 T[] _arraySliceSliceMinass_u(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1450 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1451 return _arraySliceSliceMinass_s(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1452 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1453 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1454 T[] _arraySliceSliceMinass_t(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1455 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1456 return _arraySliceSliceMinass_s(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1457 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1458 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1459 T[] _arraySliceSliceMinass_s(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1460 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1461 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1462 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1463 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1464 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1465 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1466 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1467 //printf("_arraySliceSliceMinass_s()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1468 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1469 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1470 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1471 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1472 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1473 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1474 // SSE2 aligned version is 2121% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1475 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1476 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1477 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1478 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1479 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1480 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1481 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1482 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1483 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1484 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1485 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1486 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1487 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1488 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1489 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1490 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1491 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1492 movdqu XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1493 movdqu XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1494 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1495 psubw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1496 psubw XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1497 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1498 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1499 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1500 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1501 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1502 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1503 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1504 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1505 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1506 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1507 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1508 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1509 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1510 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1511 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1512 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1513 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1514 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1515 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1516 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1517 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1518 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1519 movdqa XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1520 movdqa XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1521 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1522 psubw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1523 psubw XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1524 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1525 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1526 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1527 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1528 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1529 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1530 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1531 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1532 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1533 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1534 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1535 // MMX version is 1116% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1536 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1537 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1538 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1539 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1540 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1541 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1542 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1543 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1544 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1545 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1546 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1547 start: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1548 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1549 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1550 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1551 movq MM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1552 movq MM3, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1553 add ECX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1554 psubw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1555 psubw MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1556 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1557 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1558 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1559 jb start; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1560 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1561 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1562 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1563 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1564 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1565 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1566 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1567 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1568 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1569 *aptr++ -= *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1570 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1571 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1572 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1573 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1574 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1575 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1576 printf("_arraySliceSliceMinass_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1577 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1578 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1579 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1580 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1581 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1582 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1583 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1584 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1585 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1586 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1587 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1588 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1589 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1590 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1591 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1592 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1593 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1594 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1595 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1596 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1597 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1598 b[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1599 c[] -= a[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1600 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1601 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1602 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1603 if (c[i] != cast(T)(b[i] - a[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1604 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1605 printf("[%d]: %d != %d - %d\n", i, c[i], b[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1606 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1607 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1608 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1609 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1610 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1611 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1612 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1613 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1614 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1615 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1616 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1617 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1618 * a[] = b[] * value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1619 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1620 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1621 T[] _arraySliceExpMulSliceAssign_u(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1622 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1623 return _arraySliceExpMulSliceAssign_s(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1624 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1625 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1626 T[] _arraySliceExpMulSliceAssign_t(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1627 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1628 return _arraySliceExpMulSliceAssign_s(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1629 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1630 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1631 T[] _arraySliceExpMulSliceAssign_s(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1632 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1633 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1634 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1635 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1636 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1637 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1638 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1639 //printf("_arraySliceExpMulSliceAssign_s()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1640 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1641 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1642 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1643 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1644 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1645 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1646 // SSE2 aligned version is 3733% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1647 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1648 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1649 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1650 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1651 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1652 l |= l << 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1653 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1654 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1655 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1656 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1657 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1658 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1659 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1660 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1661 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1662 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1663 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1664 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1665 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1666 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1667 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1668 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1669 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1670 pmullw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1671 pmullw XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1672 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1673 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1674 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1675 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1676 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1677 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1678 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1679 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1680 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1681 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1682 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1683 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1684 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1685 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1686 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1687 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1688 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1689 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1690 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1691 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1692 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1693 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1694 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1695 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1696 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1697 pmullw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1698 pmullw XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1699 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1700 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1701 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1702 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1703 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1704 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1705 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1706 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1707 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1708 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1709 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1710 // MMX version is 3733% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1711 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1712 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1713 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1714 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1715 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1716 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1717 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1718 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1719 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1720 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1721 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1722 movd MM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1723 pshufw MM2, MM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1724 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1725 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1726 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1727 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1728 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1729 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1730 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1731 pmullw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1732 pmullw MM1, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1733 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1734 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1735 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1736 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1737 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1738 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1739 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1740 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1741 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1742 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1743 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1744 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1745 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1746 *aptr++ = cast(T)(*bptr++ * value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1747 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1748 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1749 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1750 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1751 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1752 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1753 printf("_arraySliceExpMulSliceAssign_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1754 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1755 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1756 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1757 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1758 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1759 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1760 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1761 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1762 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1763 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1764 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1765 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1766 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1767 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1768 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1769 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1770 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1771 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1772 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1773 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1774 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1775 c[] = a[] * 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1776 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1777 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1778 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1779 if (c[i] != cast(T)(a[i] * 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1780 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1781 printf("[%d]: %d != %d * 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1782 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1783 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1784 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1785 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1786 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1787 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1788 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1789 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1790 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1791 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1792 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1793 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1794 * a[] = b[] * c[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1795 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1796 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1797 T[] _arraySliceSliceMulSliceAssign_u(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1798 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1799 return _arraySliceSliceMulSliceAssign_s(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1800 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1801 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1802 T[] _arraySliceSliceMulSliceAssign_t(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1803 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1804 return _arraySliceSliceMulSliceAssign_s(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1805 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1806 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1807 T[] _arraySliceSliceMulSliceAssign_s(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1808 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1809 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1810 assert(a.length == b.length && b.length == c.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1811 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1812 assert(disjoint(a, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1813 assert(disjoint(b, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1814 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1815 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1816 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1817 //printf("_arraySliceSliceMulSliceAssign_s()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1818 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1819 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1820 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1821 auto cptr = c.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1822 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1823 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1824 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1825 // SSE2 aligned version is 2515% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1826 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1827 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1828 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1829 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1830 if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1831 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1832 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1833 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1834 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1835 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1836 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1837 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1838 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1839 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1840 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1841 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1842 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1843 movdqu XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1844 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1845 movdqu XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1846 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1847 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1848 pmullw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1849 pmullw XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1850 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1851 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1852 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1853 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1854 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1855 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1856 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1857 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1858 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1859 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1860 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1861 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1862 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1863 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1864 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1865 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1866 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1867 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1868 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1869 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1870 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1871 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1872 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1873 movdqa XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1874 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1875 movdqa XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1876 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1877 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1878 pmullw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1879 pmullw XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1880 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1881 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1882 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1883 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1884 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1885 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1886 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1887 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1888 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1889 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1890 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1891 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1892 // MMX version is 2515% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1893 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1894 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1895 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1896 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1897 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1898 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1899 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1900 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1901 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1902 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1903 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1904 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1905 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1906 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1907 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1908 movq MM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1909 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1910 movq MM3, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1911 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1912 add ECX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1913 pmullw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1914 pmullw MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1915 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1916 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1917 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1918 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1919 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1920 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1921 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1922 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1923 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1924 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1925 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1926 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1927 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1928 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1929 *aptr++ = cast(T)(*bptr++ * *cptr++); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1930 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1931 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1932 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1933 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1934 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1935 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1936 printf("_arraySliceSliceMulSliceAssign_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1937 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1938 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1939 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1940 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1941 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1942 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1943 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1944 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1945 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1946 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1947 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1948 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1949 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1950 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1951 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1952 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1953 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1954 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1955 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1956 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1957 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1958 c[] = a[] * b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1959 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1960 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1961 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1962 if (c[i] != cast(T)(a[i] * b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1963 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1964 printf("[%d]: %d != %d * %d\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1965 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1966 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1967 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1968 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1969 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1970 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1971 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1972 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1973 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1974 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1975 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1976 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1977 * a[] *= value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1978 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1979 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1980 T[] _arrayExpSliceMulass_u(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1981 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1982 return _arrayExpSliceMulass_s(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1983 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1984 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1985 T[] _arrayExpSliceMulass_t(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1986 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1987 return _arrayExpSliceMulass_s(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1988 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1989 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1990 T[] _arrayExpSliceMulass_s(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1991 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1992 //printf("_arrayExpSliceMulass_s(a.length = %d, value = %Lg)\n", a.length, cast(real)value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1993 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1994 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1995 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1996 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1997 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1998 // SSE2 aligned version is 2044% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1999 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2000 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2001 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2002 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2003 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2004 l |= l << 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2005 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2006 if (((cast(uint) aptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2007 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2008 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2009 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2010 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2011 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2012 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2013 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2014 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2015 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2016 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2017 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2018 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2019 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2020 pmullw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2021 pmullw XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2022 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2023 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2024 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2025 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2026 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2027 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2028 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2029 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2030 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2031 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2032 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2033 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2034 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2035 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2036 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2037 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2038 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2039 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2040 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2041 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2042 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2043 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2044 pmullw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2045 pmullw XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2046 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2047 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2048 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2049 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2050 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2051 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2052 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2053 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2054 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2055 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2056 // MMX version is 2056% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2057 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2058 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2059 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2060 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2061 uint l = cast(ushort) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2062 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2063 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2064 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2065 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2066 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2067 movd MM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2068 pshufw MM2, MM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2069 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2070 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2071 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2072 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2073 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2074 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2075 pmullw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2076 pmullw MM1, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2077 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2078 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2079 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2080 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2081 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2082 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2083 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2084 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2085 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2086 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2087 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2088 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2089 *aptr++ *= value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2090 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2091 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2092 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2093 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2094 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2095 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2096 printf("_arrayExpSliceMulass_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2097 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2098 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2099 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2100 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2101 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2102 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2103 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2104 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2105 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2106 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2107 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2108 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2109 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2110 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2111 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2112 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2113 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2114 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2115 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2116 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2117 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2118 b[] = a[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2119 a[] *= 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2120 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2121 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2122 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2123 if (a[i] != cast(T)(b[i] * 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2124 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2125 printf("[%d]: %d != %d * 6\n", i, a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2126 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2127 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2128 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2129 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2130 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2131 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2132 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2133 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2134 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2135 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2136 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2137 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2138 * a[] *= b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2139 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2140 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2141 T[] _arraySliceSliceMulass_u(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2142 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2143 return _arraySliceSliceMulass_s(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2144 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2145 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2146 T[] _arraySliceSliceMulass_t(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2147 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2148 return _arraySliceSliceMulass_s(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2149 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2150 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2151 T[] _arraySliceSliceMulass_s(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2152 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2153 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2154 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2155 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2156 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2157 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2158 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2159 //printf("_arraySliceSliceMulass_s()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2160 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2161 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2162 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2163 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2164 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2165 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2166 // SSE2 aligned version is 2519% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2167 if (sse2() && a.length >= 16) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2168 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2169 auto n = aptr + (a.length & ~15); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2170 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2171 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2172 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2173 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2174 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2175 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2176 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2177 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2178 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2179 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2180 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2181 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2182 movdqu XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2183 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2184 movdqu XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2185 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2186 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2187 pmullw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2188 pmullw XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2189 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2190 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2191 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2192 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2193 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2194 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2195 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2196 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2197 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2198 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2199 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2200 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2201 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2202 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2203 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2204 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2205 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2206 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2207 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2208 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2209 movdqa XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2210 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2211 movdqa XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2212 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2213 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2214 pmullw XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2215 pmullw XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2216 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2217 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2218 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2219 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2220 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2221 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2222 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2223 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2224 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2225 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2226 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2227 // MMX version is 1712% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2228 if (mmx() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2229 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2230 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2231 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2232 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2233 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2234 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2235 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2236 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2237 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2238 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2239 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2240 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2241 movq MM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2242 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2243 movq MM3, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2244 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2245 add ECX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2246 pmullw MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2247 pmullw MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2248 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2249 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2250 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2251 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2252 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2253 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2254 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2255 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2256 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2257 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2258 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2259 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2260 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2261 *aptr++ *= *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2262 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2263 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2264 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2265 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2266 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2267 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2268 printf("_arraySliceSliceMulass_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2269 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2270 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2271 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2272 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2273 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2274 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2275 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2276 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2277 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2278 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2279 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2280 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2281 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2282 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2283 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2284 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2285 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2286 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2287 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2288 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2289 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2290 b[] = a[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2291 a[] *= c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2292 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2293 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2294 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2295 if (a[i] != cast(T)(b[i] * c[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2296 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2297 printf("[%d]: %d != %d * %d\n", i, a[i], b[i], c[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2298 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2299 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2300 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2301 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2302 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2303 } |