Mercurial > projects > ldc
annotate druntime/src/compiler/dmd/arrayint.d @ 1458:e0b2d67cfe7c
Added druntime (this should be removed once it works).
author | Robert Clipsham <robert@octarineparrot.com> |
---|---|
date | Tue, 02 Jun 2009 17:43:06 +0100 |
parents | |
children |
rev | line source |
---|---|
1458
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1 /** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2 * Contains MMX versions of certain operations for dchar, int, and uint ('w', |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
3 * 'i' and 'k' suffixes). |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
4 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
5 * Copyright: Copyright Digital Mars 2008 - 2009. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
6 * License: <a href="http://www.boost.org/LICENSE_1_0.txt>Boost License 1.0</a>. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
7 * Authors: Walter Bright, based on code originally written by Burton Radons |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
8 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
9 * Copyright Digital Mars 2008 - 2009. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
10 * Distributed under the Boost Software License, Version 1.0. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
11 * (See accompanying file LICENSE_1_0.txt or copy at |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
12 * http://www.boost.org/LICENSE_1_0.txt) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
13 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
14 module rt.arrayint; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
15 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
16 private import rt.util.cpuid; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
17 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
18 version (unittest) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
19 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
20 private import core.stdc.stdio : printf; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
21 /* This is so unit tests will test every CPU variant |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
22 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
23 int cpuid; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
24 const int CPUID_MAX = 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
25 bool mmx() { return cpuid == 1 && rt.util.cpuid.mmx(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
26 bool sse() { return cpuid == 2 && rt.util.cpuid.sse(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
27 bool sse2() { return cpuid == 3 && rt.util.cpuid.sse2(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
28 bool amd3dnow() { return cpuid == 4 && rt.util.cpuid.amd3dnow(); } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
29 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
30 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
31 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
32 alias rt.util.cpuid.mmx mmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
33 alias rt.util.cpuid.sse sse; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
34 alias rt.util.cpuid.sse2 sse2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
35 alias rt.util.cpuid.amd3dnow amd3dnow; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
36 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
37 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
38 //version = log; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
39 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
40 bool disjoint(T)(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
41 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
42 return (a.ptr + a.length <= b.ptr || b.ptr + b.length <= a.ptr); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
43 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
44 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
45 alias int T; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
46 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
47 extern (C): |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
48 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
49 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
50 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
51 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
52 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
53 * a[] = b[] + value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
54 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
55 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
56 T[] _arraySliceExpAddSliceAssign_w(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
57 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
58 return _arraySliceExpAddSliceAssign_i(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
59 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
60 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
61 T[] _arraySliceExpAddSliceAssign_k(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
62 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
63 return _arraySliceExpAddSliceAssign_i(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
64 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
65 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
66 T[] _arraySliceExpAddSliceAssign_i(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
67 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
68 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
69 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
70 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
71 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
72 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
73 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
74 //printf("_arraySliceExpAddSliceAssign_i()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
75 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
76 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
77 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
78 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
79 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
80 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
81 // SSE2 aligned version is 380% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
82 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
83 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
84 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
85 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
86 uint l = value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
87 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
88 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
89 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
90 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
91 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
92 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
93 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
94 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
95 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
96 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
97 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
98 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
99 startaddsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
100 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
101 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
102 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
103 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
104 paddd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
105 paddd XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
106 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
107 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
108 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
109 jb startaddsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
110 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
111 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
112 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
113 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
114 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
115 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
116 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
117 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
118 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
119 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
120 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
121 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
122 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
123 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
124 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
125 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
126 startaddsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
127 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
128 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
129 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
130 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
131 paddd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
132 paddd XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
133 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
134 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
135 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
136 jb startaddsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
137 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
138 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
139 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
140 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
141 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
142 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
143 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
144 // MMX version is 298% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
145 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
146 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
147 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
148 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
149 ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
150 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
151 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
152 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
153 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
154 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
155 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
156 movq MM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
157 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
158 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
159 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
160 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
161 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
162 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
163 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
164 paddd MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
165 paddd MM1, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
166 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
167 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
168 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
169 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
170 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
171 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
172 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
173 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
174 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
175 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
176 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
177 if (a.length >= 2) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
178 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
179 auto n = aptr + (a.length & ~1); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
180 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
181 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
182 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
183 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
184 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
185 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
186 mov EDX, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
187 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
188 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
189 start386: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
190 add ESI, 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
191 mov EBX, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
192 mov ECX, [EAX+4]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
193 add EAX, 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
194 add EBX, EDX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
195 add ECX, EDX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
196 mov [ESI -8], EBX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
197 mov [ESI+4-8], ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
198 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
199 jb start386; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
200 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
201 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
202 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
203 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
204 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
205 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
206 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
207 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
208 *aptr++ = *bptr++ + value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
209 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
210 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
211 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
212 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
213 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
214 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
215 printf("_arraySliceExpAddSliceAssign_i unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
216 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
217 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
218 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
219 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
220 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
221 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
222 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
223 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
224 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
225 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
226 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
227 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
228 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
229 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
230 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
231 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
232 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
233 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
234 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
235 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
236 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
237 c[] = a[] + 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
238 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
239 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
240 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
241 if (c[i] != cast(T)(a[i] + 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
242 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
243 printf("[%d]: %d != %d + 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
244 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
245 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
246 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
247 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
248 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
249 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
250 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
251 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
252 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
253 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
254 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
255 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
256 * a[] = b[] + c[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
257 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
258 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
259 T[] _arraySliceSliceAddSliceAssign_w(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
260 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
261 return _arraySliceSliceAddSliceAssign_i(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
262 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
263 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
264 T[] _arraySliceSliceAddSliceAssign_k(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
265 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
266 return _arraySliceSliceAddSliceAssign_i(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
267 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
268 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
269 T[] _arraySliceSliceAddSliceAssign_i(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
270 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
271 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
272 assert(a.length == b.length && b.length == c.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
273 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
274 assert(disjoint(a, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
275 assert(disjoint(b, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
276 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
277 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
278 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
279 //printf("_arraySliceSliceAddSliceAssign_i()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
280 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
281 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
282 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
283 auto cptr = c.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
284 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
285 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
286 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
287 // SSE2 aligned version is 1710% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
288 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
289 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
290 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
291 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
292 if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
293 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
294 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
295 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
296 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
297 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
298 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
299 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
300 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
301 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
302 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
303 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
304 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
305 movdqu XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
306 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
307 movdqu XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
308 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
309 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
310 paddd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
311 paddd XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
312 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
313 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
314 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
315 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
316 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
317 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
318 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
319 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
320 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
321 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
322 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
323 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
324 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
325 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
326 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
327 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
328 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
329 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
330 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
331 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
332 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
333 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
334 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
335 movdqa XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
336 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
337 movdqa XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
338 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
339 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
340 paddd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
341 paddd XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
342 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
343 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
344 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
345 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
346 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
347 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
348 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
349 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
350 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
351 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
352 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
353 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
354 // MMX version is 995% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
355 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
356 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
357 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
358 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
359 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
360 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
361 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
362 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
363 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
364 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
365 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
366 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
367 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
368 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
369 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
370 movq MM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
371 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
372 movq MM3, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
373 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
374 add ECX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
375 paddd MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
376 paddd MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
377 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
378 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
379 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
380 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
381 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
382 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
383 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
384 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
385 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
386 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
387 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
388 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
389 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
390 normal: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
391 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
392 *aptr++ = *bptr++ + *cptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
393 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
394 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
395 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
396 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
397 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
398 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
399 printf("_arraySliceSliceAddSliceAssign_i unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
400 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
401 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
402 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
403 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
404 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
405 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
406 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
407 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
408 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
409 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
410 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
411 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
412 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
413 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
414 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
415 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
416 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
417 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
418 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
419 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
420 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
421 c[] = a[] + b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
422 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
423 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
424 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
425 if (c[i] != cast(T)(a[i] + b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
426 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
427 printf("[%d]: %d != %d + %d\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
428 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
429 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
430 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
431 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
432 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
433 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
434 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
435 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
436 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
437 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
438 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
439 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
440 * a[] += value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
441 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
442 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
443 T[] _arrayExpSliceAddass_w(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
444 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
445 return _arrayExpSliceAddass_i(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
446 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
447 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
448 T[] _arrayExpSliceAddass_k(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
449 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
450 return _arrayExpSliceAddass_i(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
451 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
452 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
453 T[] _arrayExpSliceAddass_i(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
454 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
455 //printf("_arrayExpSliceAddass_i(a.length = %d, value = %Lg)\n", a.length, cast(real)value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
456 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
457 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
458 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
459 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
460 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
461 // SSE2 aligned version is 83% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
462 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
463 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
464 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
465 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
466 uint l = value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
467 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
468 if (((cast(uint) aptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
469 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
470 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
471 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
472 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
473 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
474 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
475 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
476 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
477 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
478 startaddsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
479 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
480 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
481 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
482 paddd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
483 paddd XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
484 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
485 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
486 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
487 jb startaddsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
488 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
489 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
490 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
491 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
492 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
493 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
494 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
495 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
496 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
497 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
498 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
499 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
500 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
501 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
502 startaddsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
503 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
504 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
505 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
506 paddd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
507 paddd XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
508 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
509 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
510 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
511 jb startaddsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
512 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
513 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
514 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
515 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
516 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
517 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
518 // MMX version is 81% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
519 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
520 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
521 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
522 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
523 ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
524 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
525 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
526 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
527 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
528 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
529 movq MM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
530 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
531 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
532 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
533 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
534 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
535 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
536 paddd MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
537 paddd MM1, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
538 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
539 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
540 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
541 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
542 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
543 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
544 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
545 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
546 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
547 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
548 if (a.length >= 2) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
549 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
550 auto n = aptr + (a.length & ~1); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
551 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
552 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
553 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
554 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
555 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
556 mov EDX, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
557 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
558 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
559 start386: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
560 mov EBX, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
561 mov ECX, [ESI+4]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
562 add ESI, 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
563 add EBX, EDX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
564 add ECX, EDX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
565 mov [ESI -8], EBX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
566 mov [ESI+4-8], ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
567 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
568 jb start386; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
569 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
570 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
571 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
572 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
573 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
574 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
575 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
576 *aptr++ += value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
577 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
578 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
579 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
580 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
581 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
582 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
583 printf("_arrayExpSliceAddass_i unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
584 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
585 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
586 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
587 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
588 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
589 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
590 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
591 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
592 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
593 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
594 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
595 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
596 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
597 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
598 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
599 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
600 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
601 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
602 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
603 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
604 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
605 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
606 a[] += 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
607 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
608 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
609 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
610 if (a[i] != cast(T)(c[i] + 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
611 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
612 printf("[%d]: %d != %d + 6\n", i, a[i], c[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
613 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
614 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
615 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
616 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
617 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
618 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
619 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
620 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
621 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
622 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
623 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
624 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
625 * a[] += b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
626 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
627 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
628 T[] _arraySliceSliceAddass_w(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
629 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
630 return _arraySliceSliceAddass_i(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
631 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
632 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
633 T[] _arraySliceSliceAddass_k(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
634 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
635 return _arraySliceSliceAddass_i(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
636 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
637 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
638 T[] _arraySliceSliceAddass_i(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
639 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
640 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
641 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
642 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
643 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
644 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
645 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
646 //printf("_arraySliceSliceAddass_i()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
647 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
648 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
649 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
650 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
651 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
652 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
653 // SSE2 aligned version is 695% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
654 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
655 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
656 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
657 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
658 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
659 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
660 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
661 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
662 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
663 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
664 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
665 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
666 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
667 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
668 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
669 movdqu XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
670 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
671 movdqu XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
672 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
673 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
674 paddd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
675 paddd XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
676 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
677 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
678 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
679 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
680 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
681 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
682 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
683 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
684 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
685 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
686 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
687 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
688 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
689 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
690 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
691 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
692 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
693 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
694 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
695 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
696 movdqa XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
697 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
698 movdqa XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
699 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
700 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
701 paddd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
702 paddd XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
703 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
704 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
705 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
706 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
707 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
708 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
709 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
710 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
711 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
712 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
713 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
714 // MMX version is 471% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
715 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
716 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
717 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
718 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
719 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
720 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
721 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
722 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
723 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
724 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
725 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
726 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
727 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
728 movq MM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
729 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
730 movq MM3, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
731 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
732 add ECX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
733 paddd MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
734 paddd MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
735 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
736 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
737 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
738 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
739 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
740 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
741 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
742 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
743 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
744 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
745 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
746 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
747 normal: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
748 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
749 *aptr++ += *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
750 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
751 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
752 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
753 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
754 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
755 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
756 printf("_arraySliceSliceAddass_i unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
757 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
758 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
759 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
760 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
761 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
762 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
763 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
764 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
765 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
766 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
767 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
768 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
769 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
770 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
771 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
772 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
773 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
774 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
775 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
776 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
777 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
778 b[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
779 c[] += a[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
780 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
781 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
782 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
783 if (c[i] != cast(T)(b[i] + a[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
784 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
785 printf("[%d]: %d != %d + %d\n", i, c[i], b[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
786 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
787 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
788 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
789 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
790 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
791 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
792 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
793 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
794 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
795 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
796 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
797 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
798 * a[] = b[] - value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
799 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
800 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
801 T[] _arraySliceExpMinSliceAssign_w(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
802 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
803 return _arraySliceExpMinSliceAssign_i(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
804 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
805 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
806 T[] _arraySliceExpMinSliceAssign_k(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
807 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
808 return _arraySliceExpMinSliceAssign_i(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
809 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
810 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
811 T[] _arraySliceExpMinSliceAssign_i(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
812 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
813 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
814 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
815 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
816 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
817 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
818 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
819 //printf("_arraySliceExpMinSliceAssign_i()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
820 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
821 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
822 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
823 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
824 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
825 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
826 // SSE2 aligned version is 400% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
827 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
828 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
829 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
830 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
831 uint l = value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
832 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
833 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
834 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
835 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
836 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
837 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
838 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
839 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
840 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
841 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
842 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
843 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
844 startaddsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
845 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
846 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
847 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
848 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
849 psubd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
850 psubd XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
851 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
852 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
853 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
854 jb startaddsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
855 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
856 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
857 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
858 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
859 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
860 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
861 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
862 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
863 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
864 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
865 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
866 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
867 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
868 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
869 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
870 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
871 startaddsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
872 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
873 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
874 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
875 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
876 psubd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
877 psubd XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
878 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
879 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
880 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
881 jb startaddsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
882 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
883 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
884 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
885 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
886 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
887 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
888 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
889 // MMX version is 315% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
890 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
891 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
892 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
893 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
894 ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
895 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
896 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
897 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
898 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
899 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
900 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
901 movq MM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
902 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
903 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
904 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
905 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
906 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
907 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
908 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
909 psubd MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
910 psubd MM1, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
911 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
912 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
913 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
914 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
915 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
916 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
917 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
918 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
919 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
920 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
921 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
922 if (a.length >= 2) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
923 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
924 auto n = aptr + (a.length & ~1); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
925 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
926 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
927 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
928 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
929 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
930 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
931 mov EDX, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
932 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
933 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
934 start386: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
935 add ESI, 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
936 mov EBX, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
937 mov ECX, [EAX+4]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
938 add EAX, 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
939 sub EBX, EDX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
940 sub ECX, EDX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
941 mov [ESI -8], EBX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
942 mov [ESI+4-8], ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
943 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
944 jb start386; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
945 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
946 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
947 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
948 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
949 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
950 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
951 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
952 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
953 *aptr++ = *bptr++ - value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
954 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
955 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
956 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
957 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
958 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
959 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
960 printf("_arraySliceExpMinSliceAssign_i unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
961 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
962 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
963 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
964 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
965 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
966 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
967 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
968 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
969 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
970 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
971 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
972 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
973 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
974 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
975 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
976 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
977 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
978 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
979 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
980 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
981 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
982 c[] = a[] - 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
983 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
984 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
985 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
986 if (c[i] != cast(T)(a[i] - 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
987 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
988 printf("[%d]: %d != %d - 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
989 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
990 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
991 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
992 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
993 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
994 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
995 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
996 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
997 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
998 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
999 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1000 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1001 * a[] = value - b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1002 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1003 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1004 T[] _arrayExpSliceMinSliceAssign_w(T[] a, T[] b, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1005 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1006 return _arrayExpSliceMinSliceAssign_i(a, b, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1007 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1008 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1009 T[] _arrayExpSliceMinSliceAssign_k(T[] a, T[] b, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1010 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1011 return _arrayExpSliceMinSliceAssign_i(a, b, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1012 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1013 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1014 T[] _arrayExpSliceMinSliceAssign_i(T[] a, T[] b, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1015 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1016 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1017 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1018 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1019 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1020 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1021 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1022 //printf("_arrayExpSliceMinSliceAssign_i()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1023 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1024 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1025 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1026 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1027 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1028 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1029 // SSE2 aligned version is 1812% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1030 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1031 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1032 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1033 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1034 uint l = value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1035 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1036 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1037 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1038 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1039 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1040 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1041 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1042 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1043 movd XMM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1044 pshufd XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1045 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1046 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1047 startaddsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1048 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1049 movdqu XMM2, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1050 movdqu XMM3, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1051 movdqa XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1052 movdqa XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1053 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1054 psubd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1055 psubd XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1056 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1057 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1058 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1059 jb startaddsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1060 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1061 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1062 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1063 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1064 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1065 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1066 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1067 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1068 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1069 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1070 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1071 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1072 movd XMM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1073 pshufd XMM4, XMM4, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1074 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1075 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1076 startaddsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1077 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1078 movdqa XMM2, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1079 movdqa XMM3, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1080 movdqa XMM0, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1081 movdqa XMM1, XMM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1082 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1083 psubd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1084 psubd XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1085 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1086 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1087 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1088 jb startaddsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1089 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1090 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1091 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1092 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1093 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1094 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1095 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1096 // MMX version is 1077% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1097 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1098 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1099 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1100 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1101 ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1102 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1103 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1104 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1105 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1106 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1107 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1108 movq MM4, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1109 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1110 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1111 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1112 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1113 movq MM2, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1114 movq MM3, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1115 movq MM0, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1116 movq MM1, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1117 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1118 psubd MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1119 psubd MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1120 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1121 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1122 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1123 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1124 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1125 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1126 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1127 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1128 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1129 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1130 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1131 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1132 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1133 *aptr++ = value - *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1134 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1135 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1136 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1137 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1138 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1139 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1140 printf("_arrayExpSliceMinSliceAssign_i unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1141 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1142 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1143 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1144 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1145 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1146 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1147 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1148 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1149 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1150 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1151 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1152 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1153 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1154 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1155 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1156 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1157 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1158 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1159 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1160 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1161 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1162 c[] = 6 - a[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1163 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1164 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1165 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1166 if (c[i] != cast(T)(6 - a[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1167 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1168 printf("[%d]: %d != 6 - %d\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1169 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1170 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1171 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1172 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1173 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1174 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1175 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1176 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1177 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1178 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1179 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1180 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1181 * a[] = b[] - c[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1182 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1183 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1184 T[] _arraySliceSliceMinSliceAssign_w(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1185 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1186 return _arraySliceSliceMinSliceAssign_i(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1187 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1188 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1189 T[] _arraySliceSliceMinSliceAssign_k(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1190 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1191 return _arraySliceSliceMinSliceAssign_i(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1192 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1193 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1194 T[] _arraySliceSliceMinSliceAssign_i(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1195 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1196 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1197 assert(a.length == b.length && b.length == c.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1198 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1199 assert(disjoint(a, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1200 assert(disjoint(b, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1201 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1202 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1203 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1204 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1205 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1206 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1207 auto cptr = c.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1208 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1209 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1210 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1211 // SSE2 aligned version is 1721% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1212 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1213 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1214 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1215 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1216 if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1217 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1218 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1219 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1220 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1221 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1222 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1223 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1224 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1225 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1226 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1227 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1228 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1229 movdqu XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1230 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1231 movdqu XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1232 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1233 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1234 psubd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1235 psubd XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1236 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1237 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1238 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1239 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1240 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1241 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1242 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1243 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1244 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1245 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1246 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1247 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1248 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1249 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1250 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1251 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1252 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1253 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1254 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1255 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1256 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1257 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1258 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1259 movdqa XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1260 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1261 movdqa XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1262 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1263 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1264 psubd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1265 psubd XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1266 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1267 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1268 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1269 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1270 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1271 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1272 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1273 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1274 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1275 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1276 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1277 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1278 // MMX version is 1002% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1279 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1280 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1281 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1282 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1283 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1284 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1285 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1286 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1287 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1288 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1289 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1290 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1291 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1292 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1293 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1294 movq MM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1295 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1296 movq MM3, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1297 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1298 add ECX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1299 psubd MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1300 psubd MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1301 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1302 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1303 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1304 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1305 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1306 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1307 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1308 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1309 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1310 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1311 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1312 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1313 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1314 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1315 *aptr++ = *bptr++ - *cptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1316 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1317 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1318 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1319 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1320 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1321 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1322 printf("_arraySliceSliceMinSliceAssign_i unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1323 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1324 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1325 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1326 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1327 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1328 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1329 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1330 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1331 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1332 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1333 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1334 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1335 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1336 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1337 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1338 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1339 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1340 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1341 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1342 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1343 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1344 c[] = a[] - b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1345 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1346 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1347 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1348 if (c[i] != cast(T)(a[i] - b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1349 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1350 printf("[%d]: %d != %d - %d\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1351 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1352 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1353 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1354 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1355 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1356 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1357 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1358 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1359 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1360 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1361 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1362 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1363 * a[] -= value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1364 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1365 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1366 T[] _arrayExpSliceMinass_w(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1367 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1368 return _arrayExpSliceMinass_i(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1369 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1370 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1371 T[] _arrayExpSliceMinass_k(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1372 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1373 return _arrayExpSliceMinass_i(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1374 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1375 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1376 T[] _arrayExpSliceMinass_i(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1377 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1378 //printf("_arrayExpSliceMinass_i(a.length = %d, value = %Lg)\n", a.length, cast(real)value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1379 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1380 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1381 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1382 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1383 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1384 // SSE2 aligned version is 81% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1385 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1386 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1387 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1388 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1389 uint l = value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1390 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1391 if (((cast(uint) aptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1392 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1393 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1394 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1395 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1396 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1397 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1398 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1399 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1400 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1401 startaddsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1402 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1403 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1404 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1405 psubd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1406 psubd XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1407 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1408 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1409 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1410 jb startaddsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1411 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1412 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1413 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1414 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1415 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1416 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1417 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1418 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1419 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1420 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1421 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1422 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1423 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1424 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1425 startaddsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1426 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1427 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1428 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1429 psubd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1430 psubd XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1431 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1432 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1433 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1434 jb startaddsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1435 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1436 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1437 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1438 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1439 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1440 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1441 // MMX version is 81% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1442 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1443 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1444 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1445 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1446 ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1447 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1448 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1449 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1450 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1451 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1452 movq MM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1453 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1454 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1455 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1456 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1457 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1458 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1459 psubd MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1460 psubd MM1, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1461 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1462 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1463 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1464 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1465 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1466 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1467 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1468 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1469 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1470 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1471 if (a.length >= 2) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1472 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1473 auto n = aptr + (a.length & ~1); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1474 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1475 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1476 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1477 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1478 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1479 mov EDX, value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1480 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1481 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1482 start386: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1483 mov EBX, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1484 mov ECX, [ESI+4]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1485 add ESI, 8; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1486 sub EBX, EDX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1487 sub ECX, EDX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1488 mov [ESI -8], EBX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1489 mov [ESI+4-8], ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1490 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1491 jb start386; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1492 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1493 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1494 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1495 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1496 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1497 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1498 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1499 *aptr++ -= value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1500 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1501 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1502 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1503 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1504 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1505 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1506 printf("_arrayExpSliceMinass_i unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1507 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1508 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1509 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1510 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1511 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1512 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1513 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1514 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1515 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1516 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1517 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1518 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1519 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1520 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1521 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1522 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1523 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1524 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1525 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1526 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1527 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1528 a[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1529 a[] -= 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1530 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1531 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1532 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1533 if (a[i] != cast(T)(c[i] - 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1534 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1535 printf("[%d]: %d != %d - 6\n", i, a[i], c[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1536 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1537 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1538 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1539 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1540 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1541 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1542 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1543 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1544 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1545 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1546 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1547 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1548 * a[] -= b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1549 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1550 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1551 T[] _arraySliceSliceMinass_w(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1552 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1553 return _arraySliceSliceMinass_i(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1554 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1555 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1556 T[] _arraySliceSliceMinass_k(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1557 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1558 return _arraySliceSliceMinass_i(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1559 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1560 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1561 T[] _arraySliceSliceMinass_i(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1562 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1563 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1564 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1565 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1566 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1567 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1568 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1569 //printf("_arraySliceSliceMinass_i()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1570 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1571 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1572 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1573 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1574 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1575 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1576 // SSE2 aligned version is 731% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1577 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1578 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1579 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1580 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1581 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1582 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1583 asm // unaligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1584 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1585 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1586 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1587 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1588 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1589 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1590 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1591 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1592 movdqu XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1593 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1594 movdqu XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1595 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1596 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1597 psubd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1598 psubd XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1599 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1600 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1601 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1602 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1603 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1604 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1605 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1606 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1607 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1608 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1609 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1610 asm // aligned case |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1611 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1612 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1613 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1614 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1615 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1616 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1617 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1618 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1619 movdqa XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1620 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1621 movdqa XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1622 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1623 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1624 psubd XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1625 psubd XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1626 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1627 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1628 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1629 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1630 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1631 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1632 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1633 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1634 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1635 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1636 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1637 // MMX version is 441% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1638 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1639 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1640 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1641 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1642 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1643 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1644 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1645 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1646 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1647 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1648 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1649 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1650 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1651 movq MM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1652 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1653 movq MM3, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1654 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1655 add ECX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1656 psubd MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1657 psubd MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1658 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1659 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1660 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1661 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1662 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1663 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1664 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1665 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1666 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1667 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1668 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1669 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1670 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1671 *aptr++ -= *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1672 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1673 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1674 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1675 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1676 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1677 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1678 printf("_arraySliceSliceMinass_i unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1679 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1680 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1681 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1682 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1683 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1684 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1685 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1686 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1687 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1688 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1689 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1690 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1691 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1692 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1693 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1694 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1695 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1696 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1697 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1698 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1699 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1700 b[] = c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1701 c[] -= a[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1702 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1703 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1704 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1705 if (c[i] != cast(T)(b[i] - a[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1706 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1707 printf("[%d]: %d != %d - %d\n", i, c[i], b[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1708 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1709 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1710 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1711 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1712 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1713 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1714 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1715 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1716 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1717 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1718 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1719 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1720 * a[] = b[] * value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1721 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1722 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1723 T[] _arraySliceExpMulSliceAssign_w(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1724 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1725 return _arraySliceExpMulSliceAssign_i(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1726 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1727 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1728 T[] _arraySliceExpMulSliceAssign_k(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1729 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1730 return _arraySliceExpMulSliceAssign_i(a, value, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1731 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1732 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1733 T[] _arraySliceExpMulSliceAssign_i(T[] a, T value, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1734 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1735 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1736 assert(a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1737 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1738 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1739 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1740 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1741 //printf("_arraySliceExpMulSliceAssign_i()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1742 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1743 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1744 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1745 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1746 version (none) // multiplying a pair is not supported by MMX |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1747 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1748 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1749 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1750 // SSE2 aligned version is 1380% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1751 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1752 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1753 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1754 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1755 uint l = value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1756 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1757 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1758 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1759 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1760 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1761 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1762 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1763 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1764 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1765 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1766 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1767 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1768 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1769 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1770 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1771 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1772 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1773 pmuludq XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1774 pmuludq XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1775 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1776 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1777 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1778 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1779 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1780 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1781 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1782 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1783 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1784 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1785 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1786 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1787 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1788 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1789 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1790 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1791 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1792 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1793 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1794 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1795 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1796 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1797 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1798 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1799 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1800 pmuludq XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1801 pmuludq XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1802 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1803 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1804 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1805 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1806 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1807 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1808 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1809 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1810 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1811 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1812 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1813 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1814 // MMX version is 1380% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1815 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1816 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1817 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1818 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1819 ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1820 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1821 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1822 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1823 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1824 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1825 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1826 movq MM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1827 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1828 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1829 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1830 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1831 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1832 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1833 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1834 pmuludq MM0, MM2; // only multiplies low 32 bits |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1835 pmuludq MM1, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1836 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1837 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1838 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1839 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1840 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1841 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1842 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1843 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1844 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1845 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1846 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1847 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1848 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1849 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1850 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1851 *aptr++ = *bptr++ * value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1852 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1853 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1854 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1855 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1856 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1857 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1858 printf("_arraySliceExpMulSliceAssign_s unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1859 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1860 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1861 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1862 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1863 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1864 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1865 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1866 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1867 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1868 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1869 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1870 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1871 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1872 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1873 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1874 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1875 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1876 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1877 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1878 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1879 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1880 c[] = a[] * 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1881 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1882 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1883 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1884 //printf("[%d]: %d ?= %d * 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1885 if (c[i] != cast(T)(a[i] * 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1886 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1887 printf("[%d]: %d != %d * 6\n", i, c[i], a[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1888 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1889 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1890 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1891 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1892 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1893 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1894 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1895 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1896 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1897 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1898 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1899 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1900 * a[] = b[] * c[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1901 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1902 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1903 T[] _arraySliceSliceMulSliceAssign_w(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1904 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1905 return _arraySliceSliceMulSliceAssign_i(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1906 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1907 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1908 T[] _arraySliceSliceMulSliceAssign_k(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1909 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1910 return _arraySliceSliceMulSliceAssign_i(a, c, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1911 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1912 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1913 T[] _arraySliceSliceMulSliceAssign_i(T[] a, T[] c, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1914 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1915 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1916 assert(a.length == b.length && b.length == c.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1917 assert(disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1918 assert(disjoint(a, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1919 assert(disjoint(b, c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1920 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1921 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1922 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1923 //printf("_arraySliceSliceMulSliceAssign_i()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1924 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1925 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1926 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1927 auto cptr = c.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1928 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1929 version (none) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1930 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1931 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1932 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1933 // SSE2 aligned version is 1407% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1934 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1935 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1936 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1937 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1938 if (((cast(uint) aptr | cast(uint) bptr | cast(uint) cptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1939 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1940 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1941 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1942 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1943 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1944 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1945 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1946 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1947 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1948 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1949 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1950 movdqu XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1951 movdqu XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1952 movdqu XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1953 movdqu XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1954 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1955 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1956 pmuludq XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1957 pmuludq XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1958 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1959 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1960 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1961 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1962 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1963 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1964 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1965 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1966 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1967 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1968 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1969 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1970 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1971 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1972 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1973 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1974 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1975 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1976 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1977 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1978 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1979 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1980 movdqa XMM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1981 movdqa XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1982 movdqa XMM1, [EAX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1983 movdqa XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1984 add EAX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1985 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1986 pmuludq XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1987 pmuludq XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1988 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1989 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1990 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1991 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1992 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1993 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1994 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1995 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1996 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1997 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1998 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1999 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2000 // MMX version is 1029% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2001 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2002 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2003 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2004 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2005 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2006 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2007 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2008 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2009 mov EAX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2010 mov ECX, cptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2011 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2012 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2013 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2014 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2015 movq MM0, [EAX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2016 movq MM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2017 movq MM1, [EAX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2018 movq MM3, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2019 add EAX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2020 add ECX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2021 pmuludq MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2022 pmuludq MM1, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2023 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2024 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2025 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2026 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2027 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2028 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2029 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2030 mov bptr, EAX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2031 mov cptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2032 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2033 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2034 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2035 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2036 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2037 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2038 *aptr++ = *bptr++ * *cptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2039 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2040 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2041 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2042 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2043 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2044 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2045 printf("_arraySliceSliceMulSliceAssign_i unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2046 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2047 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2048 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2049 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2050 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2051 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2052 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2053 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2054 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2055 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2056 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2057 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2058 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2059 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2060 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2061 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2062 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2063 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2064 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2065 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2066 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2067 c[] = a[] * b[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2068 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2069 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2070 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2071 if (c[i] != cast(T)(a[i] * b[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2072 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2073 printf("[%d]: %d != %d * %d\n", i, c[i], a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2074 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2075 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2076 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2077 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2078 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2079 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2080 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2081 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2082 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2083 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2084 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2085 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2086 * a[] *= value |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2087 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2088 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2089 T[] _arrayExpSliceMulass_w(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2090 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2091 return _arrayExpSliceMulass_i(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2092 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2093 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2094 T[] _arrayExpSliceMulass_k(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2095 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2096 return _arrayExpSliceMulass_i(a, value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2097 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2098 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2099 T[] _arrayExpSliceMulass_i(T[] a, T value) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2100 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2101 //printf("_arrayExpSliceMulass_i(a.length = %d, value = %Lg)\n", a.length, cast(real)value); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2102 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2103 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2104 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2105 version (none) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2106 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2107 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2108 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2109 // SSE2 aligned version is 400% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2110 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2111 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2112 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2113 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2114 uint l = value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2115 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2116 if (((cast(uint) aptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2117 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2118 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2119 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2120 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2121 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2122 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2123 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2124 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2125 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2126 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2127 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2128 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2129 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2130 pmuludq XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2131 pmuludq XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2132 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2133 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2134 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2135 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2136 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2137 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2138 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2139 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2140 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2141 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2142 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2143 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2144 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2145 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2146 movd XMM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2147 pshufd XMM2, XMM2, 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2148 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2149 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2150 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2151 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2152 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2153 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2154 pmuludq XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2155 pmuludq XMM1, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2156 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2157 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2158 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2159 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2160 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2161 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2162 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2163 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2164 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2165 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2166 // MMX version is 402% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2167 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2168 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2169 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2170 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2171 ulong l = cast(uint) value | (cast(ulong)cast(uint) value << 32); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2172 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2173 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2174 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2175 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2176 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2177 movq MM2, l; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2178 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2179 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2180 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2181 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2182 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2183 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2184 pmuludq MM0, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2185 pmuludq MM1, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2186 movq [ESI -16], MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2187 movq [ESI+8-16], MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2188 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2189 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2190 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2191 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2192 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2193 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2194 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2195 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2196 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2197 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2198 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2199 *aptr++ *= value; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2200 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2201 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2202 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2203 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2204 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2205 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2206 printf("_arrayExpSliceMulass_i unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2207 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2208 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2209 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2210 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2211 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2212 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2213 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2214 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2215 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2216 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2217 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2218 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2219 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2220 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2221 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2222 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2223 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2224 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2225 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2226 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2227 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2228 b[] = a[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2229 a[] *= 6; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2230 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2231 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2232 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2233 if (a[i] != cast(T)(b[i] * 6)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2234 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2235 printf("[%d]: %d != %d * 6\n", i, a[i], b[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2236 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2237 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2238 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2239 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2240 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2241 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2242 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2243 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2244 /* ======================================================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2245 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2246 /*********************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2247 * Computes: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2248 * a[] *= b[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2249 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2250 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2251 T[] _arraySliceSliceMulass_w(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2252 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2253 return _arraySliceSliceMulass_i(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2254 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2255 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2256 T[] _arraySliceSliceMulass_k(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2257 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2258 return _arraySliceSliceMulass_i(a, b); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2259 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2260 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2261 T[] _arraySliceSliceMulass_i(T[] a, T[] b) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2262 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2263 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2264 assert (a.length == b.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2265 assert (disjoint(a, b)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2266 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2267 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2268 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2269 //printf("_arraySliceSliceMulass_i()\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2270 auto aptr = a.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2271 auto aend = aptr + a.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2272 auto bptr = b.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2273 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2274 version (none) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2275 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2276 version (D_InlineAsm_X86) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2277 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2278 // SSE2 aligned version is 873% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2279 if (sse2() && a.length >= 8) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2280 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2281 auto n = aptr + (a.length & ~7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2282 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2283 if (((cast(uint) aptr | cast(uint) bptr) & 15) != 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2284 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2285 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2286 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2287 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2288 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2289 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2290 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2291 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2292 startsse2u: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2293 movdqu XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2294 movdqu XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2295 movdqu XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2296 movdqu XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2297 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2298 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2299 pmuludq XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2300 pmuludq XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2301 movdqu [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2302 movdqu [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2303 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2304 jb startsse2u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2305 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2306 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2307 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2308 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2309 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2310 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2311 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2312 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2313 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2314 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2315 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2316 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2317 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2318 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2319 startsse2a: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2320 movdqa XMM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2321 movdqa XMM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2322 movdqa XMM1, [ESI+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2323 movdqa XMM3, [ECX+16]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2324 add ESI, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2325 add ECX, 32; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2326 pmuludq XMM0, XMM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2327 pmuludq XMM1, XMM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2328 movdqa [ESI -32], XMM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2329 movdqa [ESI+16-32], XMM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2330 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2331 jb startsse2a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2332 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2333 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2334 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2335 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2336 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2337 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2338 /+ BUG: comment out this section until we figure out what is going |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2339 wrong with the invalid pshufd instructions. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2340 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2341 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2342 // MMX version is 573% faster |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2343 if (mmx() && a.length >= 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2344 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2345 auto n = aptr + (a.length & ~3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2346 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2347 asm |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2348 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2349 mov ESI, aptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2350 mov EDI, n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2351 mov ECX, bptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2352 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2353 align 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2354 startmmx: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2355 movq MM0, [ESI]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2356 movq MM2, [ECX]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2357 movq MM1, [ESI+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2358 movq MM3, [ECX+8]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2359 pxor MM4, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2360 pxor MM5, MM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2361 punpckldq MM4, MM0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2362 punpckldq MM5, MM2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2363 add ESI, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2364 add ECX, 16; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2365 pmuludq MM4, MM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2366 pshufd MM4, MM4, 8; // ? |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2367 movq [ESI -16], MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2368 pxor MM4, MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2369 pxor MM5, MM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2370 punpckldq MM4, MM1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2371 punpckldq MM5, MM3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2372 pmuludq MM4, MM5; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2373 pshufd MM4, MM4, 8; // ? |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2374 movq [ESI+8-16], MM4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2375 cmp ESI, EDI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2376 jb startmmx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2377 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2378 emms; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2379 mov aptr, ESI; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2380 mov bptr, ECX; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2381 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2382 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2383 +/ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2384 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2385 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2386 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2387 while (aptr < aend) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2388 *aptr++ *= *bptr++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2389 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2390 return a; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2391 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2392 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2393 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2394 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2395 printf("_arraySliceSliceMulass_i unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2396 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2397 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2398 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2399 version (log) printf(" cpuid %d\n", cpuid); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2400 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2401 for (int j = 0; j < 2; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2402 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2403 const int dim = 67; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2404 T[] a = new T[dim + j]; // aligned on 16 byte boundary |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2405 a = a[j .. dim + j]; // misalign for second iteration |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2406 T[] b = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2407 b = b[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2408 T[] c = new T[dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2409 c = c[j .. dim + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2410 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2411 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2412 { a[i] = cast(T)i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2413 b[i] = cast(T)(i + 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2414 c[i] = cast(T)(i * 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2415 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2416 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2417 b[] = a[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2418 a[] *= c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2419 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2420 for (int i = 0; i < dim; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2421 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2422 if (a[i] != cast(T)(b[i] * c[i])) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2423 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2424 printf("[%d]: %d != %d * %d\n", i, a[i], b[i], c[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2425 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2426 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2427 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2428 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2429 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2430 } |