changeset 1290:e3d0dea394ab

partial inline asm review
author thomask
date Wed, 27 Dec 2006 11:59:48 +0000
parents ad4d84aba69b
children 5ef63ca6e8c3
files run/a/asm_pmaddwd_01_A.d run/a/asm_pmaxsw_01_A.d run/a/asm_pmaxub_01_A.d run/a/asm_pminsub_01_A.d run/a/asm_pminsw_01_A.d run/a/asm_pmovmskb_01_A.d run/a/asm_pmovmskb_01_B.d run/a/asm_pmulhrw_01_A.d run/a/asm_pmulhuw_01_A.d run/a/asm_pmulhw_01_A.d run/a/asm_pmullw_01_A.d run/a/asm_pmuludq_01_A.d
diffstat 12 files changed, 113 insertions(+), 55 deletions(-) [+]
line wrap: on
line diff
--- a/run/a/asm_pmaddwd_01_A.d	Wed Dec 27 11:59:35 2006 +0000
+++ b/run/a/asm_pmaddwd_01_A.d	Wed Dec 27 11:59:48 2006 +0000
@@ -17,17 +17,23 @@
 	int main(){
 		haveSSE2!()();
 
-		const short[8] A = [1, 2, 3, 4, 5, 6, 16, 1];
-		const short[8] B = [-9, 10, -11, -12, 13, 14, 0xFFF, 2];
+		short[] A = [1, 2, 3, 4, 5, 6, 16, 1];
+		short* a = A.ptr;
 
-		int[4] c;
+		short[] B = [-9, 10, -11, -12, 13, 14, 0xFFF, 2];
+		short* b = B.ptr;
+
+		int* c = (new int[4]).ptr;
 
-		asm{
-			movdqu XMM0, A;
-			movdqu XMM1, B;
-			pmaddwd XMM0, XMM1;
-			movdqu c, XMM0;
-		}
+			asm{
+				mov EAX, a;
+				movdqu XMM0, [EAX];
+				mov EAX, b;
+				movdqu XMM1, [EAX];
+				pmaddwd XMM0, XMM1;
+				mov EAX, c;
+				movdqu [EAX], XMM0;
+			}
 
 		if(c[0] != (1 * -9) + (2 * 10)){
 			assert(0);
@@ -35,10 +41,10 @@
 		if(c[1] != (3 * -11) + (4 * -12)){
 			assert(0);
 		}
-		if(c[2] != (5 * -12) + (6 * 13)){
+		if(c[2] != (5 * 13) + (6 * 14)){
 			assert(0);
 		}
-		if(c[3] != 0xFFF02){
+		if(c[3] != 0xFFF2){
 			assert(0);
 		}
 
--- a/run/a/asm_pmaxsw_01_A.d	Wed Dec 27 11:59:35 2006 +0000
+++ b/run/a/asm_pmaxsw_01_A.d	Wed Dec 27 11:59:48 2006 +0000
@@ -17,9 +17,13 @@
 	int main(){
 		haveSSE!()();
 
-		short* a = [cast(short)1, 2, 3, 4, 5, 6, 16, 2];
-		short* b = [cast(short)-9, 10, -11, -12, 13, 14, 0xFFF, 1];
-		short* c = new short[8];
+		short[] A = [cast(short)1, 2, 3, 4, 5, 6, 16, 2];
+		short* a = A.ptr;
+
+		short[] B = [cast(short)-9, 10, -11, -12, 13, 14, 0xFFF, 1];
+		short* b = B.ptr;
+
+		short* c = (new short[8]).ptr;
 
 		static if(size_t.sizeof == 4){
 			asm{
--- a/run/a/asm_pmaxub_01_A.d	Wed Dec 27 11:59:35 2006 +0000
+++ b/run/a/asm_pmaxub_01_A.d	Wed Dec 27 11:59:48 2006 +0000
@@ -16,9 +16,13 @@
 	int main(){
 		haveSSE!()();
 
-		ubyte* a = [cast(ubyte) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-		ubyte* b = [cast(ubyte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 17];
-		ubyte* c = new ubyte[16];
+		ubyte[] A = [cast(ubyte) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+		ubyte* a = A.ptr;
+
+		ubyte[] B = [cast(ubyte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 17];
+		ubyte* b = B.ptr;
+
+		ubyte* c = (new ubyte[16]).ptr;
 
 		static if(size_t.sizeof == 4){
 			asm{
--- a/run/a/asm_pminsub_01_A.d	Wed Dec 27 11:59:35 2006 +0000
+++ b/run/a/asm_pminsub_01_A.d	Wed Dec 27 11:59:48 2006 +0000
@@ -16,9 +16,13 @@
 	int main(){
 		haveSSE!()();
 
-		ubyte* a = [cast(ubyte)15,  2,  3,  4,  5,  6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-		ubyte* b = [cast(ubyte) 1, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1, 17];
-		ubyte* c = new ubyte[16];
+		ubyte[] A = [cast(ubyte)15,  2,  3,  4,  5,  6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+		ubyte* a = A.ptr;
+
+		ubyte[] B = [cast(ubyte) 1, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1, 17];
+		ubyte* b = B.ptr;
+
+		ubyte* c = (new ubyte[16]).ptr;
 
 		static if(size_t.sizeof == 4){
 			asm{
--- a/run/a/asm_pminsw_01_A.d	Wed Dec 27 11:59:35 2006 +0000
+++ b/run/a/asm_pminsw_01_A.d	Wed Dec 27 11:59:48 2006 +0000
@@ -16,9 +16,13 @@
 	int main(){
 		haveSSE!()();
 
-		short* a = [cast(short)1, 2, 3, 4, 5, 6, 16, 2];
-		short* b = [cast(short)-9, 10, -11, -12, 13, 14, 0xFFF, 1];
-		short* c = new short[8];
+		short[] A = [cast(short)1, 2, 3, 4, 5, 6, 16, 2];
+		short* a = A.ptr;
+
+		short[] B = [cast(short)-9, 10, -11, -12, 13, 14, 0xFFF, 1];
+		short* b = B.ptr;
+
+		short* c = (new short[8]).ptr;
 
 		static if(size_t.sizeof == 4){
 			asm{
--- a/run/a/asm_pmovmskb_01_A.d	Wed Dec 27 11:59:35 2006 +0000
+++ b/run/a/asm_pmovmskb_01_A.d	Wed Dec 27 11:59:48 2006 +0000
@@ -17,21 +17,35 @@
 	int main(){
 		haveSSE2!()();
 
-		const ubyte Y = 0b1000_0000;
-		const ubyte N = 0b0111_1111;
+		ubyte Y = 0b1000_0000;
+		ubyte N = 0b0111_1111;
 
-		const ubyte[16] A = [Y, N, Y, Y, N, N, Y, Y, Y, N, N, N, Y, N, N, Y];
+		ubyte[] A = [Y, N, Y, Y, N, N, Y, Y, Y, N, N, N, Y, N, N, Y];
+		ubyte* a = A.ptr;
 
 		int b;
 
-		asm{
-			mov EAX, 0x1234_5678;
-			movdqu XMM0, A;
-			pmovmskb EAX, XMM0;
-			mov b, EAX;
+		static if(size_t.sizeof == 4){
+			asm{
+				mov EAX, 0x1234_5678;
+				mov EAX, a;
+				movdqu XMM0, [EAX];
+				pmovmskb EAX, XMM0;
+				mov b, EAX;
+			}
+		}else static if(size_t.sizeof == 8){
+			asm{
+				mov EAX, 0x1234_5678;
+				mov RAX, a;
+				movdqu XMM0, [RAX];
+				pmovmskb EAX, XMM0;
+				mov b, EAX;
+			}
+		}else{
+			static assert(0, "unhandled pointer size");
 		}
 
-		if(b != 0b1011_0011_1000_1001){
+		if(b != 0b1001_0001_1100_1101){
 			assert(0);
 		}
 
--- a/run/a/asm_pmovmskb_01_B.d	Wed Dec 27 11:59:35 2006 +0000
+++ b/run/a/asm_pmovmskb_01_B.d	Wed Dec 27 11:59:48 2006 +0000
@@ -4,8 +4,16 @@
 
 module dstress.run.a.asm_pmovmskb_01_B;
 
-int main(){
-	version(D_InlineAsm_X86){
+version(D_InlineAsm_X86){
+	version = runTest;
+}else version(D_InlineAsm_X86_64){
+	version = runTest;
+}
+
+version(runTest){
+	import addon.cpuinfo;
+
+	int main(){
 		ulong a = 0x0888_7FFF_FFFF_0000;
 		uint i;
 
@@ -21,8 +29,8 @@
 		}
 
 		return 0;
-	}else{
-		pragma(msg, "no Inline asm support");
-		static assert(0);
 	}
+}else{
+	pragma(msg, "DSTRESS{XFAIL}: no inline ASM support");
+	static assert(0);
 }
--- a/run/a/asm_pmulhrw_01_A.d	Wed Dec 27 11:59:35 2006 +0000
+++ b/run/a/asm_pmulhrw_01_A.d	Wed Dec 27 11:59:48 2006 +0000
@@ -10,7 +10,6 @@
 	version = runTest;
 }
 
-
 version(runTest){
 	import addon.cpuinfo;
 
--- a/run/a/asm_pmulhuw_01_A.d	Wed Dec 27 11:59:35 2006 +0000
+++ b/run/a/asm_pmulhuw_01_A.d	Wed Dec 27 11:59:48 2006 +0000
@@ -16,10 +16,13 @@
 	int main(){
 		haveSSE!()();
 
-		ushort* a = [cast(ushort)1, 2, 0x7FFF, 7, 0x7FF0, 0x7EDC, 3, 0x6BCD];
-		ushort* b = [cast(ushort)1, 0, 7, 0x7FFF, 0x00FF, 0x7EDC, 5, 13];
+		ushort[] A = [cast(ushort)1, 2, 0x7FFF, 7, 0x7FF0, 0x7EDC, 3, 0x6BCD];
+		ushort* a = A.ptr;
 
-		ushort* c = new ushort[8];
+		ushort[] B = [cast(ushort)1, 0, 7, 0x7FFF, 0x00FF, 0x7EDC, 5, 13];
+		ushort* b = B.ptr;
+
+		ushort* c = (new ushort[8]).ptr;
 
 		static if(size_t.sizeof == 4){
 			asm{
--- a/run/a/asm_pmulhw_01_A.d	Wed Dec 27 11:59:35 2006 +0000
+++ b/run/a/asm_pmulhw_01_A.d	Wed Dec 27 11:59:48 2006 +0000
@@ -16,10 +16,13 @@
 	int main(){
 		haveSSE2!()();
 
-		short* a = [cast(short)-1, 2, 0x7FFF, 7, 0x7FF0, 0x7EDC, 3, -16];
-		short* b = [cast(short)2, 0, 7, 0x7FFF, 0x00FF, 0x7EDC, 5, 0x6BCD];
+		short[] A = [cast(short)-1, 2, 0x7FFF, 7, 0x7FF0, 0x7EDC, 3, -16];
+		short* a = A.ptr;
 
-		ushort* c = new ushort[8];
+		short[] B = [cast(short)2, 0, 7, 0x7FFF, 0x00FF, 0x7EDC, 5, 0x6BCD];
+		short* b = B.ptr;
+
+		ushort* c = (new ushort[8]).ptr;
 
 		static if(size_t.sizeof == 4){
 			asm{
--- a/run/a/asm_pmullw_01_A.d	Wed Dec 27 11:59:35 2006 +0000
+++ b/run/a/asm_pmullw_01_A.d	Wed Dec 27 11:59:48 2006 +0000
@@ -16,10 +16,13 @@
 	int main(){
 		haveSSE2!()();
 
-		short* a = [cast(short)-1, 2, 0x7FFF, 7, 0x7FF0, 0x7EDC, 3, -16];
-		short* b = [cast(short)2, 0, 7, 0x7FFF, 0x00FF, 0x7EDC, 5, 0x7ABC];
+		short[] A = [cast(short)-1, 2, 0x7FFF, 7, 0x7FF0, 0x7EDC, 3, -16];
+		short* a = A.ptr;
 
-		ushort* c = new ushort[8];
+		short[] B = [cast(short)2, 0, 7, 0x7FFF, 0x00FF, 0x7EDC, 5, 0x7ABC];
+		short* b = B.ptr;
+
+		ushort* c = (new ushort[8]).ptr;
 
 		static if(size_t.sizeof == 4){
 			asm{
--- a/run/a/asm_pmuludq_01_A.d	Wed Dec 27 11:59:35 2006 +0000
+++ b/run/a/asm_pmuludq_01_A.d	Wed Dec 27 11:59:48 2006 +0000
@@ -16,22 +16,28 @@
 	int main(){
 		haveSSE2!()();
 
-		const uint[4] A = [1, 0x1234_5678, 0xFEDC_A987, 3];
-		const uint[4] B = [0xFFFF_FFFF, 0xABCD, 13, 88];
+		uint[] A = [1, 0x1234_5678, 0xFEDC_A987, 3];
+		uint* a = A.ptr;
 
-		ulong[2] c;
+		uint[] B = [0xFFFF_FFFF, 0xABCD, 13, 88];
+		uint* b = B.ptr;
+
+		ulong* c = (new ulong[2]).ptr;
 
 		asm{
-			movdqu XMM0, A;
-			movdqu XMM1, B;
+			mov EAX, a; 
+			movdqu XMM0, [EAX];
+			mov EAX, b;
+			movdqu XMM1, [EAX];
 			pmuludq XMM0, XMM1;
-			movdqu c, XMM0;
+			mov EAX, c;
+			movdqu [EAX], XMM0;
 		}
 
-		if(c[0] != 0x0000_0C37__89AB_6618){
+		if(c[0] != 0xFFFF_FFFF){
 			assert(0);
 		}
-		if(c[1] != 0x0000_0000__0000_0108){
+		if(c[1] != 0xC_F134_9BDB){
 			assert(0);
 		}