Mercurial > projects > ldc
annotate druntime/src/compiler/dmd/arraybyte.d @ 1458:e0b2d67cfe7c
Added druntime (this should be removed once it works).
author | Robert Clipsham <robert@octarineparrot.com> |
---|---|
date | Tue, 02 Jun 2009 17:43:06 +0100 |
parents | |
children |
rev | line source |
---|---|
1458
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1 /** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2 * Contains SSE2 and MMX versions of certain operations for char, byte, and |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
3 * ubyte ('a', 'g' and 'h' suffixes). |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
4 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
5 * Copyright: Copyright Digital Mars 2008 - 2009. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
6 * License: <a href="http://www.boost.org/LICENSE_1_0.txt>Boost License 1.0</a>. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
7 * Authors: Walter Bright, based on code originally written by Burton Radons |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
8 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
9 * Copyright Digital Mars 2008 - 2009. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
10 * Distributed under the Boost Software License, Version 1.0. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
11 * (See accompanying file LICENSE_1_0.txt or copy at |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
12 * http://www.boost.org/LICENSE_1_0.txt) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
13 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
14 module rt.arraybyte; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
15 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
16 import rt.util.cpuid; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
17 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
18 version (unittest) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
19 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
20 private import core.stdc.stdio : printf; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
21 /* This is so unit tests will test every CPU variant |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
22 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
23 int cpuid; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
24 const int CPUID_MAX = 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
25 bool mmx() { return cpuid == 1 && rt.util.cpuid.mmx(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
26 bool sse() { return cpuid == 2 && rt.util.cpuid.sse(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
27 bool sse2() { return cpuid == 3 && rt.util.cpuid.sse2(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
28 bool amd3dnow() { return cpuid == 4 && rt.util.cpuid.amd3dnow(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
29 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
30 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
31 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
32 alias rt.util.cpuid.mmx mmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
33 alias rt.util.cpuid.sse sse; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
34 alias rt.util.cpuid.sse2 sse2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
35 alias rt.util.cpuid.amd3dnow amd3dnow; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
36 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
37 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
38 //version = log; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
39 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
40 bool disjoint(T)(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
41 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
42 return (a.ptr + a.length <= b.ptr || b.ptr + b.length <= a.ptr); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
43 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
44 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
45 alias byte T; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
46 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
47 extern (C): |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
48 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
49 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
50 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
51 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
52 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
53 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
54 * a[] = b[] + value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
55 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
56 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
57 T[] _arraySliceExpAddSliceAssign_a(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
58 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
59 return _arraySliceExpAddSliceAssign_g(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
60 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
61 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
62 T[] _arraySliceExpAddSliceAssign_h(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
63 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
64 return _arraySliceExpAddSliceAssign_g(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
65 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
66 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
67 T[] _arraySliceExpAddSliceAssign_g(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
68 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
69 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
70 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
71 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
72 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
73 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
74 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
75 //printf("_arraySliceExpAddSliceAssign_g()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
76 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
77 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
78 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
79 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
80 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
81 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
82 // SSE2 aligned version is 1088% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
83 if (sse2() && a.length >= 64) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
84 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
85 auto n = aptr + (a.length & ~63); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
86 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
87 uint l = cast(ubyte) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
88 l |= (l << 8); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
89 l |= (l << 16); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
90 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
91 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
92 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
93 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
94 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
95 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
96 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
97 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
98 movd XMM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
99 pshufd XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
100 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
101 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
102 startaddsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
103 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
104 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
105 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
106 movdqu XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
107 movdqu XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
108 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
109 paddb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
110 paddb XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
111 paddb XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
112 paddb XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
113 movdqu [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
114 movdqu [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
115 movdqu [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
116 movdqu [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
117 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
118 jb startaddsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
119 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
120 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
121 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
122 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
123 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
124 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
125 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
126 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
127 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
128 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
129 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
130 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
131 movd XMM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
132 pshufd XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
133 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
134 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
135 startaddsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
136 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
137 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
138 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
139 movdqa XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
140 movdqa XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
141 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
142 paddb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
143 paddb XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
144 paddb XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
145 paddb XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
146 movdqa [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
147 movdqa [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
148 movdqa [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
149 movdqa [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
150 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
151 jb startaddsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
152 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
153 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
154 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
155 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
156 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
157 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
158 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
159 // MMX version is 1000% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
160 if (mmx() && a.length >= 32) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
161 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
162 auto n = aptr + (a.length & ~31); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
163 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
164 uint l = cast(ubyte) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
165 l |= (l << 8); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
166 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
167 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
168 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
169 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
170 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
171 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
172 movd MM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
173 pshufw MM4, MM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
174 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
175 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
176 startaddmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
177 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
178 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
179 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
180 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
181 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
182 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
183 paddb MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
184 paddb MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
185 paddb MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
186 paddb MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
187 movq [ESI -32], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
188 movq [ESI+8 -32], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
189 movq [ESI+16-32], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
190 movq [ESI+24-32], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
191 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
192 jb startaddmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
193 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
194 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
195 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
196 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
197 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
198 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
199 /* trying to be fair and treat normal 32-bit cpu the same way as we do |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
200 * the SIMD units, with unrolled asm. There's not enough registers, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
201 * really. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
202 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
203 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
204 if (a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
205 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
206 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
207 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
208 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
209 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
210 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
211 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
212 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
213 mov CL, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
214 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
215 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
216 startadd386: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
217 add ESI, 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
218 mov DX, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
219 mov BX, [EAX+2]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
220 add EAX, 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
221 add BL, CL; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
222 add BH, CL; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
223 add DL, CL; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
224 add DH, CL; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
225 mov [ESI -4], DX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
226 mov [ESI+2 -4], BX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
227 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
228 jb startadd386; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
229 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
230 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
231 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
232 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
233 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
234 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
235 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
236 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
237 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
238 *aptr++ = cast(T)(*bptr++ + value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
239 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
240 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
241 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
242 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
243 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
244 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
245 printf("_arraySliceExpAddSliceAssign_g unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
246 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
247 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
248 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
249 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
250 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
251 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
252 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
253 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
254 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
255 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
256 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
257 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
258 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
259 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
260 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
261 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
262 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
263 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
264 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
265 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
266 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
267 c[] = a[] + 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
268 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
269 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
270 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
271 if (c[i] != cast(T)(a[i] + 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
272 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
273 printf("[%d]: %d != %d + 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
274 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
275 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
276 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
277 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
278 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
279 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
280 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
281 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
282 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
283 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
284 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
285 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
286 * a[] = b[] + c[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
287 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
288 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
289 T[] _arraySliceSliceAddSliceAssign_a(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
290 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
291 return _arraySliceSliceAddSliceAssign_g(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
292 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
293 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
294 T[] _arraySliceSliceAddSliceAssign_h(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
295 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
296 return _arraySliceSliceAddSliceAssign_g(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
297 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
298 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
299 T[] _arraySliceSliceAddSliceAssign_g(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
300 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
301 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
302 assert(a.length == b.length && b.length == c.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
303 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
304 assert(disjoint(a, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
305 assert(disjoint(b, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
306 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
307 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
308 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
309 //printf("_arraySliceSliceAddSliceAssign_g()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
310 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
311 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
312 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
313 auto cptr = c.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
314 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
315 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
316 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
317 // SSE2 aligned version is 5739% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
318 if (sse2() && a.length >= 64) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
319 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
320 auto n = aptr + (a.length & ~63); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
321 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
322 if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
323 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
324 version (log) printf("\tsse2 unaligned\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
325 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
326 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
327 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
328 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
329 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
330 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
331 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
332 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
333 startaddlsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
334 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
335 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
336 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
337 movdqu XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
338 movdqu XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
339 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
340 movdqu XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
341 movdqu XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
342 movdqu XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
343 movdqu XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
344 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
345 paddb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
346 paddb XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
347 paddb XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
348 paddb XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
349 movdqu [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
350 movdqu [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
351 movdqu [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
352 movdqu [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
353 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
354 jb startaddlsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
355 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
356 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
357 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
358 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
359 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
360 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
361 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
362 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
363 version (log) printf("\tsse2 aligned\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
364 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
365 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
366 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
367 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
368 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
369 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
370 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
371 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
372 startaddlsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
373 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
374 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
375 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
376 movdqa XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
377 movdqa XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
378 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
379 movdqa XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
380 movdqa XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
381 movdqa XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
382 movdqa XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
383 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
384 paddb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
385 paddb XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
386 paddb XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
387 paddb XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
388 movdqa [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
389 movdqa [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
390 movdqa [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
391 movdqa [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
392 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
393 jb startaddlsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
394 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
395 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
396 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
397 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
398 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
399 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
400 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
401 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
402 // MMX version is 4428% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
403 if (mmx() && a.length >= 32) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
404 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
405 version (log) printf("\tmmx\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
406 auto n = aptr + (a.length & ~31); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
407 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
408 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
409 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
410 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
411 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
412 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
413 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
414 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
415 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
416 startaddlmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
417 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
418 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
419 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
420 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
421 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
422 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
423 movq MM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
424 movq MM5, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
425 movq MM6, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
426 movq MM7, [ECX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
427 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
428 paddb MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
429 paddb MM1, MM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
430 paddb MM2, MM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
431 paddb MM3, MM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
432 movq [ESI -32], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
433 movq [ESI+8 -32], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
434 movq [ESI+16-32], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
435 movq [ESI+24-32], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
436 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
437 jb startaddlmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
438 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
439 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
440 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
441 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
442 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
443 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
444 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
445 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
446 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
447 version (log) if (aptr < aend) printf("\tbase\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
448 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
449 *aptr++ = cast(T)(*bptr++ + *cptr++); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
450 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
451 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
452 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
453 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
454 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
455 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
456 printf("_arraySliceSliceAddSliceAssign_g unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
457 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
458 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
459 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
460 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
461 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
462 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
463 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
464 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
465 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
466 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
467 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
468 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
469 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
470 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
471 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
472 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
473 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
474 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
475 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
476 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
477 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
478 c[] = a[] + b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
479 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
480 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
481 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
482 if (c[i] != cast(T)(a[i] + b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
483 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
484 printf("[%d]: %d != %d + %d\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
485 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
486 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
487 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
488 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
489 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
490 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
491 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
492 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
493 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
494 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
495 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
496 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
497 * a[] += value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
498 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
499 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
500 T[] _arrayExpSliceAddass_a(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
501 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
502 return _arrayExpSliceAddass_g(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
503 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
504 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
505 T[] _arrayExpSliceAddass_h(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
506 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
507 return _arrayExpSliceAddass_g(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
508 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
509 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
510 T[] _arrayExpSliceAddass_g(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
511 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
512 //printf("_arrayExpSliceAddass_g(a.length = %d, value = %Lg)\n", a.length, cast(real)value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
513 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
514 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
515 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
516 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
517 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
518 // SSE2 aligned version is 1578% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
519 if (sse2() && a.length >= 64) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
520 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
521 auto n = aptr + (a.length & ~63); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
522 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
523 uint l = cast(ubyte) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
524 l |= (l << 8); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
525 l |= (l << 16); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
526 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
527 if (((cast(uint) aptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
528 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
529 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
530 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
531 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
532 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
533 movd XMM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
534 pshufd XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
535 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
536 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
537 startaddasssse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
538 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
539 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
540 movdqu XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
541 movdqu XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
542 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
543 paddb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
544 paddb XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
545 paddb XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
546 paddb XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
547 movdqu [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
548 movdqu [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
549 movdqu [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
550 movdqu [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
551 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
552 jb startaddasssse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
553 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
554 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
555 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
556 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
557 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
558 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
559 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
560 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
561 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
562 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
563 movd XMM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
564 pshufd XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
565 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
566 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
567 startaddasssse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
568 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
569 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
570 movdqa XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
571 movdqa XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
572 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
573 paddb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
574 paddb XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
575 paddb XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
576 paddb XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
577 movdqa [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
578 movdqa [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
579 movdqa [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
580 movdqa [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
581 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
582 jb startaddasssse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
583 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
584 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
585 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
586 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
587 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
588 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
589 // MMX version is 1721% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
590 if (mmx() && a.length >= 32) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
591 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
592 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
593 auto n = aptr + (a.length & ~31); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
594 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
595 uint l = cast(ubyte) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
596 l |= (l << 8); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
597 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
598 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
599 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
600 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
601 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
602 movd MM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
603 pshufw MM4, MM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
604 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
605 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
606 startaddassmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
607 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
608 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
609 movq MM2, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
610 movq MM3, [ESI+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
611 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
612 paddb MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
613 paddb MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
614 paddb MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
615 paddb MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
616 movq [ESI -32], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
617 movq [ESI+8 -32], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
618 movq [ESI+16-32], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
619 movq [ESI+24-32], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
620 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
621 jb startaddassmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
622 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
623 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
624 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
625 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
626 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
627 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
628 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
629 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
630 *aptr++ += value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
631 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
632 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
633 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
634 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
635 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
636 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
637 printf("_arrayExpSliceAddass_g unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
638 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
639 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
640 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
641 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
642 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
643 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
644 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
645 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
646 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
647 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
648 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
649 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
650 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
651 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
652 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
653 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
654 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
655 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
656 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
657 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
658 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
659 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
660 c[] += 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
661 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
662 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
663 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
664 if (c[i] != cast(T)(a[i] + 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
665 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
666 printf("[%d]: %d != %d + 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
667 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
668 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
669 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
670 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
671 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
672 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
673 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
674 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
675 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
676 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
677 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
678 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
679 * a[] += b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
680 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
681 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
682 T[] _arraySliceSliceAddass_a(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
683 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
684 return _arraySliceSliceAddass_g(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
685 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
686 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
687 T[] _arraySliceSliceAddass_h(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
688 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
689 return _arraySliceSliceAddass_g(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
690 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
691 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
692 T[] _arraySliceSliceAddass_g(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
693 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
694 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
695 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
696 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
697 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
698 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
699 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
700 //printf("_arraySliceSliceAddass_g()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
701 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
702 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
703 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
704 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
705 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
706 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
707 // SSE2 aligned version is 4727% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
708 if (sse2() && a.length >= 64) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
709 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
710 auto n = aptr + (a.length & ~63); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
711 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
712 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
713 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
714 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
715 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
716 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
717 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
718 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
719 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
720 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
721 startaddasslsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
722 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
723 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
724 movdqu XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
725 movdqu XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
726 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
727 movdqu XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
728 movdqu XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
729 movdqu XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
730 movdqu XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
731 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
732 paddb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
733 paddb XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
734 paddb XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
735 paddb XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
736 movdqu [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
737 movdqu [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
738 movdqu [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
739 movdqu [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
740 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
741 jb startaddasslsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
742 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
743 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
744 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
745 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
746 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
747 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
748 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
749 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
750 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
751 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
752 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
753 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
754 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
755 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
756 startaddasslsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
757 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
758 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
759 movdqa XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
760 movdqa XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
761 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
762 movdqa XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
763 movdqa XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
764 movdqa XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
765 movdqa XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
766 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
767 paddb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
768 paddb XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
769 paddb XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
770 paddb XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
771 movdqa [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
772 movdqa [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
773 movdqa [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
774 movdqa [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
775 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
776 jb startaddasslsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
777 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
778 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
779 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
780 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
781 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
782 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
783 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
784 // MMX version is 3059% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
785 if (mmx() && a.length >= 32) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
786 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
787 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
788 auto n = aptr + (a.length & ~31); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
789 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
790 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
791 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
792 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
793 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
794 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
795 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
796 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
797 startaddasslmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
798 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
799 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
800 movq MM2, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
801 movq MM3, [ESI+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
802 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
803 movq MM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
804 movq MM5, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
805 movq MM6, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
806 movq MM7, [ECX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
807 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
808 paddb MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
809 paddb MM1, MM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
810 paddb MM2, MM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
811 paddb MM3, MM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
812 movq [ESI -32], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
813 movq [ESI+8 -32], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
814 movq [ESI+16-32], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
815 movq [ESI+24-32], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
816 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
817 jb startaddasslmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
818 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
819 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
820 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
821 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
822 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
823 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
824 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
825 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
826 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
827 *aptr++ += *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
828 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
829 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
830 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
831 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
832 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
833 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
834 printf("_arraySliceSliceAddass_g unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
835 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
836 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
837 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
838 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
839 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
840 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
841 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
842 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
843 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
844 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
845 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
846 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
847 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
848 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
849 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
850 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
851 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
852 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
853 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
854 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
855 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
856 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
857 c[] += b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
858 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
859 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
860 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
861 if (c[i] != cast(T)(a[i] + b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
862 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
863 printf("[%d]: %d != %d + %d\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
864 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
865 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
866 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
867 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
868 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
869 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
870 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
871 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
872 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
873 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
874 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
875 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
876 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
877 * a[] = b[] - value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
878 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
879 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
880 T[] _arraySliceExpMinSliceAssign_a(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
881 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
882 return _arraySliceExpMinSliceAssign_g(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
883 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
884 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
885 T[] _arraySliceExpMinSliceAssign_h(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
886 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
887 return _arraySliceExpMinSliceAssign_g(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
888 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
889 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
890 T[] _arraySliceExpMinSliceAssign_g(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
891 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
892 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
893 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
894 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
895 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
896 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
897 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
898 //printf("_arraySliceExpMinSliceAssign_g()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
899 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
900 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
901 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
902 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
903 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
904 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
905 // SSE2 aligned version is 1189% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
906 if (sse2() && a.length >= 64) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
907 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
908 auto n = aptr + (a.length & ~63); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
909 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
910 uint l = cast(ubyte) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
911 l |= (l << 8); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
912 l |= (l << 16); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
913 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
914 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
915 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
916 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
917 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
918 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
919 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
920 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
921 movd XMM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
922 pshufd XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
923 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
924 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
925 startsubsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
926 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
927 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
928 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
929 movdqu XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
930 movdqu XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
931 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
932 psubb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
933 psubb XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
934 psubb XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
935 psubb XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
936 movdqu [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
937 movdqu [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
938 movdqu [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
939 movdqu [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
940 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
941 jb startsubsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
942 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
943 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
944 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
945 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
946 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
947 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
948 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
949 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
950 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
951 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
952 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
953 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
954 movd XMM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
955 pshufd XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
956 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
957 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
958 startsubsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
959 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
960 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
961 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
962 movdqa XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
963 movdqa XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
964 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
965 psubb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
966 psubb XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
967 psubb XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
968 psubb XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
969 movdqa [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
970 movdqa [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
971 movdqa [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
972 movdqa [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
973 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
974 jb startsubsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
975 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
976 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
977 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
978 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
979 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
980 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
981 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
982 // MMX version is 1079% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
983 if (mmx() && a.length >= 32) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
984 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
985 auto n = aptr + (a.length & ~31); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
986 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
987 uint l = cast(ubyte) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
988 l |= (l << 8); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
989 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
990 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
991 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
992 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
993 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
994 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
995 movd MM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
996 pshufw MM4, MM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
997 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
998 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
999 startsubmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1000 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1001 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1002 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1003 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1004 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1005 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1006 psubb MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1007 psubb MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1008 psubb MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1009 psubb MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1010 movq [ESI -32], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1011 movq [ESI+8 -32], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1012 movq [ESI+16-32], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1013 movq [ESI+24-32], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1014 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1015 jb startsubmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1016 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1017 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1018 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1019 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1020 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1021 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1022 // trying to be fair and treat normal 32-bit cpu the same way as we do the SIMD units, with unrolled asm. There's not enough registers, really. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1023 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1024 if (a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1025 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1026 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1027 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1028 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1029 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1030 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1031 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1032 mov CL, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1033 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1034 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1035 startsub386: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1036 add ESI, 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1037 mov DX, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1038 mov BX, [EAX+2]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1039 add EAX, 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1040 sub BL, CL; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1041 sub BH, CL; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1042 sub DL, CL; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1043 sub DH, CL; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1044 mov [ESI -4], DX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1045 mov [ESI+2 -4], BX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1046 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1047 jb startsub386; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1048 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1049 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1050 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1051 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1052 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1053 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1054 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1055 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1056 *aptr++ = cast(T)(*bptr++ - value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1057 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1058 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1059 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1060 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1061 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1062 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1063 printf("_arraySliceExpMinSliceAssign_g unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1064 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1065 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1066 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1067 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1068 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1069 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1070 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1071 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1072 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1073 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1074 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1075 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1076 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1077 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1078 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1079 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1080 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1081 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1082 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1083 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1084 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1085 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1086 c[] = b[] - 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1087 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1088 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1089 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1090 if (c[i] != cast(T)(b[i] - 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1091 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1092 printf("[%d]: %d != %d - 6\n", i, c[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1093 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1094 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1095 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1096 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1097 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1098 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1099 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1100 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1101 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1102 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1103 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1104 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1105 * a[] = value - b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1106 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1107 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1108 T[] _arrayExpSliceMinSliceAssign_a(T[] a, T[] b, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1109 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1110 return _arrayExpSliceMinSliceAssign_g(a, b, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1111 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1112 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1113 T[] _arrayExpSliceMinSliceAssign_h(T[] a, T[] b, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1114 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1115 return _arrayExpSliceMinSliceAssign_g(a, b, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1116 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1117 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1118 T[] _arrayExpSliceMinSliceAssign_g(T[] a, T[] b, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1119 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1120 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1121 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1122 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1123 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1124 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1125 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1126 //printf("_arrayExpSliceMinSliceAssign_g()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1127 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1128 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1129 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1130 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1131 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1132 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1133 // SSE2 aligned version is 8748% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1134 if (sse2() && a.length >= 64) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1135 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1136 auto n = aptr + (a.length & ~63); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1137 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1138 uint l = cast(ubyte) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1139 l |= (l << 8); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1140 l |= (l << 16); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1141 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1142 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1143 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1144 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1145 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1146 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1147 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1148 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1149 movd XMM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1150 pshufd XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1151 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1152 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1153 startsubrsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1154 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1155 movdqa XMM5, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1156 movdqa XMM6, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1157 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1158 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1159 psubb XMM5, XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1160 psubb XMM6, XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1161 movdqu [ESI -64], XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1162 movdqu [ESI+16-64], XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1163 movdqa XMM5, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1164 movdqa XMM6, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1165 movdqu XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1166 movdqu XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1167 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1168 psubb XMM5, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1169 psubb XMM6, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1170 movdqu [ESI+32-64], XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1171 movdqu [ESI+48-64], XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1172 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1173 jb startsubrsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1174 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1175 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1176 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1177 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1178 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1179 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1180 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1181 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1182 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1183 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1184 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1185 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1186 movd XMM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1187 pshufd XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1188 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1189 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1190 startsubrsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1191 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1192 movdqa XMM5, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1193 movdqa XMM6, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1194 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1195 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1196 psubb XMM5, XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1197 psubb XMM6, XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1198 movdqa [ESI -64], XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1199 movdqa [ESI+16-64], XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1200 movdqa XMM5, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1201 movdqa XMM6, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1202 movdqa XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1203 movdqa XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1204 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1205 psubb XMM5, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1206 psubb XMM6, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1207 movdqa [ESI+32-64], XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1208 movdqa [ESI+48-64], XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1209 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1210 jb startsubrsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1211 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1212 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1213 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1214 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1215 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1216 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1217 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1218 // MMX version is 7397% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1219 if (mmx() && a.length >= 32) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1220 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1221 auto n = aptr + (a.length & ~31); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1222 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1223 uint l = cast(ubyte) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1224 l |= (l << 8); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1225 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1226 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1227 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1228 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1229 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1230 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1231 movd MM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1232 pshufw MM4, MM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1233 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1234 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1235 startsubrmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1236 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1237 movq MM5, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1238 movq MM6, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1239 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1240 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1241 psubb MM5, MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1242 psubb MM6, MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1243 movq [ESI -32], MM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1244 movq [ESI+8 -32], MM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1245 movq MM5, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1246 movq MM6, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1247 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1248 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1249 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1250 psubb MM5, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1251 psubb MM6, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1252 movq [ESI+16-32], MM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1253 movq [ESI+24-32], MM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1254 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1255 jb startsubrmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1256 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1257 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1258 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1259 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1260 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1261 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1262 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1263 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1264 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1265 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1266 *aptr++ = cast(T)(value - *bptr++); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1267 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1268 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1269 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1270 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1271 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1272 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1273 printf("_arrayExpSliceMinSliceAssign_g unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1274 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1275 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1276 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1277 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1278 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1279 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1280 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1281 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1282 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1283 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1284 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1285 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1286 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1287 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1288 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1289 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1290 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1291 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1292 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1293 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1294 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1295 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1296 c[] = 6 - b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1297 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1298 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1299 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1300 if (c[i] != cast(T)(6 - b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1301 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1302 printf("[%d]: %d != 6 - %d\n", i, c[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1303 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1304 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1305 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1306 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1307 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1308 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1309 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1310 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1311 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1312 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1313 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1314 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1315 * a[] = b[] - c[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1316 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1317 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1318 T[] _arraySliceSliceMinSliceAssign_a(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1319 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1320 return _arraySliceSliceMinSliceAssign_g(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1321 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1322 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1323 T[] _arraySliceSliceMinSliceAssign_h(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1324 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1325 return _arraySliceSliceMinSliceAssign_g(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1326 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1327 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1328 T[] _arraySliceSliceMinSliceAssign_g(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1329 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1330 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1331 assert(a.length == b.length && b.length == c.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1332 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1333 assert(disjoint(a, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1334 assert(disjoint(b, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1335 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1336 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1337 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1338 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1339 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1340 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1341 auto cptr = c.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1342 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1343 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1344 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1345 // SSE2 aligned version is 5756% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1346 if (sse2() && a.length >= 64) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1347 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1348 auto n = aptr + (a.length & ~63); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1349 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1350 if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1351 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1352 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1353 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1354 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1355 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1356 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1357 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1358 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1359 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1360 startsublsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1361 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1362 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1363 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1364 movdqu XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1365 movdqu XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1366 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1367 movdqu XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1368 movdqu XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1369 movdqu XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1370 movdqu XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1371 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1372 psubb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1373 psubb XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1374 psubb XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1375 psubb XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1376 movdqu [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1377 movdqu [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1378 movdqu [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1379 movdqu [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1380 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1381 jb startsublsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1382 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1383 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1384 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1385 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1386 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1387 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1388 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1389 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1390 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1391 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1392 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1393 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1394 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1395 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1396 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1397 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1398 startsublsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1399 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1400 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1401 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1402 movdqa XMM2, [EAX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1403 movdqa XMM3, [EAX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1404 add EAX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1405 movdqa XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1406 movdqa XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1407 movdqa XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1408 movdqa XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1409 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1410 psubb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1411 psubb XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1412 psubb XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1413 psubb XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1414 movdqa [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1415 movdqa [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1416 movdqa [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1417 movdqa [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1418 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1419 jb startsublsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1420 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1421 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1422 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1423 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1424 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1425 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1426 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1427 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1428 // MMX version is 4428% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1429 if (mmx() && a.length >= 32) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1430 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1431 auto n = aptr + (a.length & ~31); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1432 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1433 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1434 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1435 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1436 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1437 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1438 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1439 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1440 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1441 startsublmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1442 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1443 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1444 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1445 movq MM2, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1446 movq MM3, [EAX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1447 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1448 movq MM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1449 movq MM5, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1450 movq MM6, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1451 movq MM7, [ECX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1452 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1453 psubb MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1454 psubb MM1, MM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1455 psubb MM2, MM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1456 psubb MM3, MM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1457 movq [ESI -32], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1458 movq [ESI+8 -32], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1459 movq [ESI+16-32], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1460 movq [ESI+24-32], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1461 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1462 jb startsublmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1463 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1464 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1465 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1466 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1467 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1468 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1469 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1470 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1471 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1472 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1473 *aptr++ = cast(T)(*bptr++ - *cptr++); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1474 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1475 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1476 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1477 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1478 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1479 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1480 printf("_arraySliceSliceMinSliceAssign_g unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1481 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1482 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1483 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1484 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1485 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1486 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1487 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1488 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1489 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1490 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1491 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1492 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1493 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1494 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1495 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1496 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1497 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1498 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1499 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1500 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1501 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1502 c[] = a[] - b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1503 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1504 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1505 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1506 if (c[i] != cast(T)(a[i] - b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1507 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1508 printf("[%d]: %d != %d - %d\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1509 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1510 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1511 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1512 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1513 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1514 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1515 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1516 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1517 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1518 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1519 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1520 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1521 * a[] -= value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1522 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1523 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1524 T[] _arrayExpSliceMinass_a(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1525 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1526 return _arrayExpSliceMinass_g(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1527 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1528 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1529 T[] _arrayExpSliceMinass_h(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1530 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1531 return _arrayExpSliceMinass_g(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1532 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1533 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1534 T[] _arrayExpSliceMinass_g(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1535 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1536 //printf("_arrayExpSliceMinass_g(a.length = %d, value = %Lg)\n", a.length, cast(real)value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1537 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1538 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1539 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1540 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1541 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1542 // SSE2 aligned version is 1577% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1543 if (sse2() && a.length >= 64) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1544 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1545 auto n = aptr + (a.length & ~63); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1546 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1547 uint l = cast(ubyte) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1548 l |= (l << 8); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1549 l |= (l << 16); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1550 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1551 if (((cast(uint) aptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1552 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1553 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1554 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1555 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1556 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1557 movd XMM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1558 pshufd XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1559 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1560 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1561 startsubasssse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1562 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1563 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1564 movdqu XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1565 movdqu XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1566 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1567 psubb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1568 psubb XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1569 psubb XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1570 psubb XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1571 movdqu [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1572 movdqu [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1573 movdqu [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1574 movdqu [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1575 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1576 jb startsubasssse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1577 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1578 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1579 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1580 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1581 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1582 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1583 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1584 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1585 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1586 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1587 movd XMM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1588 pshufd XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1589 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1590 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1591 startsubasssse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1592 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1593 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1594 movdqa XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1595 movdqa XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1596 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1597 psubb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1598 psubb XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1599 psubb XMM2, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1600 psubb XMM3, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1601 movdqa [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1602 movdqa [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1603 movdqa [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1604 movdqa [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1605 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1606 jb startsubasssse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1607 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1608 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1609 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1610 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1611 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1612 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1613 // MMX version is 1577% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1614 if (mmx() && a.length >= 32) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1615 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1616 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1617 auto n = aptr + (a.length & ~31); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1618 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1619 uint l = cast(ubyte) value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1620 l |= (l << 8); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1621 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1622 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1623 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1624 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1625 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1626 movd MM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1627 pshufw MM4, MM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1628 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1629 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1630 startsubassmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1631 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1632 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1633 movq MM2, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1634 movq MM3, [ESI+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1635 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1636 psubb MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1637 psubb MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1638 psubb MM2, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1639 psubb MM3, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1640 movq [ESI -32], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1641 movq [ESI+8 -32], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1642 movq [ESI+16-32], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1643 movq [ESI+24-32], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1644 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1645 jb startsubassmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1646 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1647 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1648 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1649 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1650 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1651 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1652 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1653 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1654 *aptr++ -= value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1655 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1656 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1657 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1658 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1659 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1660 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1661 printf("_arrayExpSliceMinass_g unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1662 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1663 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1664 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1665 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1666 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1667 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1668 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1669 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1670 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1671 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1672 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1673 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1674 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1675 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1676 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1677 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1678 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1679 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1680 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1681 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1682 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1683 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1684 c[] -= 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1685 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1686 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1687 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1688 if (c[i] != cast(T)(a[i] - 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1689 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1690 printf("[%d]: %d != %d - 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1691 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1692 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1693 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1694 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1695 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1696 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1697 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1698 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1699 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1700 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1701 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1702 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1703 * a[] -= b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1704 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1705 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1706 T[] _arraySliceSliceMinass_a(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1707 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1708 return _arraySliceSliceMinass_g(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1709 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1710 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1711 T[] _arraySliceSliceMinass_h(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1712 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1713 return _arraySliceSliceMinass_g(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1714 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1715 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1716 T[] _arraySliceSliceMinass_g(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1717 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1718 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1719 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1720 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1721 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1722 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1723 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1724 //printf("_arraySliceSliceMinass_g()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1725 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1726 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1727 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1728 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1729 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1730 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1731 // SSE2 aligned version is 4800% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1732 if (sse2() && a.length >= 64) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1733 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1734 auto n = aptr + (a.length & ~63); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1735 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1736 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1737 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1738 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1739 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1740 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1741 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1742 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1743 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1744 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1745 startsubasslsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1746 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1747 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1748 movdqu XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1749 movdqu XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1750 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1751 movdqu XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1752 movdqu XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1753 movdqu XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1754 movdqu XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1755 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1756 psubb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1757 psubb XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1758 psubb XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1759 psubb XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1760 movdqu [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1761 movdqu [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1762 movdqu [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1763 movdqu [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1764 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1765 jb startsubasslsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1766 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1767 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1768 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1769 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1770 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1771 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1772 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1773 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1774 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1775 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1776 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1777 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1778 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1779 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1780 startsubasslsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1781 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1782 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1783 movdqa XMM2, [ESI+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1784 movdqa XMM3, [ESI+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1785 add ESI, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1786 movdqa XMM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1787 movdqa XMM5, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1788 movdqa XMM6, [ECX+32]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1789 movdqa XMM7, [ECX+48]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1790 add ECX, 64; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1791 psubb XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1792 psubb XMM1, XMM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1793 psubb XMM2, XMM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1794 psubb XMM3, XMM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1795 movdqa [ESI -64], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1796 movdqa [ESI+16-64], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1797 movdqa [ESI+32-64], XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1798 movdqa [ESI+48-64], XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1799 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1800 jb startsubasslsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1801 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1802 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1803 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1804 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1805 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1806 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1807 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1808 // MMX version is 3107% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1809 if (mmx() && a.length >= 32) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1810 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1811 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1812 auto n = aptr + (a.length & ~31); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1813 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1814 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1815 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1816 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1817 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1818 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1819 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1820 align 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1821 startsubasslmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1822 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1823 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1824 movq MM2, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1825 movq MM3, [ESI+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1826 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1827 movq MM4, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1828 movq MM5, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1829 movq MM6, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1830 movq MM7, [ECX+24]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1831 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1832 psubb MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1833 psubb MM1, MM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1834 psubb MM2, MM6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1835 psubb MM3, MM7; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1836 movq [ESI -32], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1837 movq [ESI+8 -32], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1838 movq [ESI+16-32], MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1839 movq [ESI+24-32], MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1840 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1841 jb startsubasslmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1842 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1843 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1844 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1845 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1846 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1847 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1848 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1849 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1850 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1851 *aptr++ -= *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1852 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1853 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1854 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1855 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1856 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1857 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1858 printf("_arraySliceSliceMinass_g unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1859 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1860 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1861 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1862 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1863 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1864 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1865 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1866 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1867 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1868 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1869 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1870 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1871 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1872 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1873 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1874 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1875 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1876 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1877 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1878 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1879 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1880 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1881 c[] -= b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1882 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1883 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1884 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1885 if (c[i] != cast(T)(a[i] - b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1886 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1887 printf("[%d]: %d != %d - %d\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1888 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1889 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1890 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1891 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1892 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1893 } |