Mercurial > projects > ldc
comparison druntime/src/compiler/dmd/util/cpuid.d @ 1458:e0b2d67cfe7c
Added druntime (this should be removed once it works).
author | Robert Clipsham <robert@octarineparrot.com> |
---|---|
date | Tue, 02 Jun 2009 17:43:06 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1456:7b218ec1044f | 1458:e0b2d67cfe7c |
---|---|
1 /** | |
2 * Identify the characteristics of the host CPU, providing information | |
3 * about cache sizes and assembly optimisation hints. | |
4 * | |
5 * Some of this information was extremely difficult to track down. Some of the | |
6 * documents below were found only in cached versions stored by search engines! | |
7 * This code relies on information found in: | |
8 | |
9 * - "Intel(R) 64 and IA-32 Architectures Software Developers Manual, | |
10 * Volume 2A: Instruction Set Reference, A-M" (2007). | |
11 * - "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008). | |
12 * - "AMD Processor Recognition Application Note For Processors Prior to AMD | |
13 * Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005). | |
14 * - "AMD Geode(TM) GX Processors Data Book", | |
15 * Advanced Micro Devices, Publication ID 31505E, (2005). | |
16 * - "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000). | |
17 * - "Application note 106: Software Customization for the 6x86 Family", | |
18 * Cyrix Corporation, Rev 1.5 (1998) | |
19 * - http://ftp.intron.ac/pub/document/cpu/cpuid.htm | |
20 * - "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution", | |
21 * National Semiconductor, (2002) | |
22 * - "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008). | |
23 * - http://www.sandpile.org/ia32/cpuid.htm | |
24 * - http://grafi.ii.pw.edu.pl/gbm/x86/cpuid.html | |
25 * - "What every programmer should know about memory", | |
26 * Ulrich Depper, Red Hat, Inc., (2007). | |
27 * | |
28 * Bugs: Currently only works on x86 and Itanium CPUs. | |
29 * Many processors have bugs in their microcode for the CPUID instruction, | |
30 * so sometimes the cache information may be incorrect. | |
31 * | |
32 * Copyright: Copyright Don Clugston 2007 - 2009. | |
33 * License: <a href="http://www.boost.org/LICENSE_1_0.txt>Boost License 1.0</a>. | |
34 * Authors: Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk> | |
35 * | |
36 * Copyright Don Clugston 2007 - 2009. | |
37 * Distributed under the Boost Software License, Version 1.0. | |
38 * (See accompanying file LICENSE_1_0.txt or copy at | |
39 * http://www.boost.org/LICENSE_1_0.txt) | |
40 */ | |
41 module rt.util.cpuid; | |
42 | |
43 // If optimizing for a particular processor, it is generally better | |
44 // to identify based on features rather than model. NOTE: Normally | |
45 // it's only worthwhile to optimise for the latest Intel and AMD CPU, | |
46 // with a backup for other CPUs. | |
47 // Pentium -- preferPentium1() | |
48 // PMMX -- + mmx() | |
49 // PPro -- default | |
50 // PII -- + mmx() | |
51 // PIII -- + mmx() + sse() | |
52 // PentiumM -- + mmx() + sse() + sse2() | |
53 // Pentium4 -- preferPentium4() | |
54 // PentiumD -- + isX86_64() | |
55 // Core2 -- default + isX86_64() | |
56 // AMD K5 -- preferPentium1() | |
57 // AMD K6 -- + mmx() | |
58 // AMD K6-II -- + mmx() + 3dnow() | |
59 // AMD K7 -- preferAthlon() | |
60 // AMD K8 -- + sse2() | |
61 // AMD K10 -- + isX86_64() | |
62 // Cyrix 6x86 -- preferPentium1() | |
63 // 6x86MX -- + mmx() | |
64 | |
65 public: | |
66 | |
67 /// Cache size and behaviour | |
68 struct CacheInfo | |
69 { | |
70 /// Size of the cache, in kilobytes, per CPU. | |
71 /// For L1 unified (data + code) caches, this size is half the physical size. | |
72 /// (we don't halve it for larger sizes, since normally | |
73 /// data size is much greater than code size for critical loops). | |
74 uint size; | |
75 /// Number of ways of associativity, eg: | |
76 /// 1 = direct mapped | |
77 /// 2 = 2-way set associative | |
78 /// 3 = 3-way set associative | |
79 /// ubyte.max = fully associative | |
80 ubyte associativity; | |
81 /// Number of bytes read into the cache when a cache miss occurs. | |
82 uint lineSize; | |
83 } | |
84 | |
85 public: | |
86 /// Returns vendor string, for display purposes only. | |
87 /// Do NOT use this to determine features! | |
88 /// Note that some CPUs have programmable vendorIDs. | |
89 char[] vendor() {return vendorID;} | |
90 /// Returns processor string, for display purposes only | |
91 char[] processor() {return processorName;} | |
92 | |
93 /// The data caches. If there are fewer than 5 physical caches levels, | |
94 /// the remaining levels are set to uint.max (== entire memory space) | |
95 __gshared CacheInfo[5] datacache; | |
96 /// Does it have an x87 FPU on-chip? | |
97 bool x87onChip() {return (features&FPU_BIT)!=0;} | |
98 /// Is MMX supported? | |
99 bool mmx() {return (features&MMX_BIT)!=0;} | |
100 /// Is SSE supported? | |
101 bool sse() {return (features&SSE_BIT)!=0;} | |
102 /// Is SSE2 supported? | |
103 bool sse2() {return (features&SSE2_BIT)!=0;} | |
104 /// Is SSE3 supported? | |
105 bool sse3() {return (miscfeatures&SSE3_BIT)!=0;} | |
106 /// Is SSSE3 supported? | |
107 bool ssse3() {return (miscfeatures&SSSE3_BIT)!=0;} | |
108 /// Is SSE4.1 supported? | |
109 bool sse41() {return (miscfeatures&SSE41_BIT)!=0;} | |
110 /// Is SSE4.2 supported? | |
111 bool sse42() {return (miscfeatures&SSE42_BIT)!=0;} | |
112 /// Is SSE4a supported? | |
113 bool sse4a() {return (amdmiscfeatures&SSE4A_BIT)!=0;} | |
114 /// Is SSE5 supported? | |
115 bool sse5() {return (amdmiscfeatures&SSE5_BIT)!=0;} | |
116 /// Is AMD 3DNOW supported? | |
117 bool amd3dnow() {return (amdfeatures&AMD_3DNOW_BIT)!=0;} | |
118 /// Is AMD 3DNOW Ext supported? | |
119 bool amd3dnowExt() {return (amdfeatures&AMD_3DNOW_EXT_BIT)!=0;} | |
120 /// Are AMD extensions to MMX supported? | |
121 bool amdMmx() {return (amdfeatures&AMD_MMX_BIT)!=0;} | |
122 /// Is fxsave/fxrstor supported? | |
123 bool hasFxsr() {return (features&FXSR_BIT)!=0;} | |
124 /// Is cmov supported? | |
125 bool hasCmov() {return (features&CMOV_BIT)!=0;} | |
126 /// Is rdtsc supported? | |
127 bool hasRdtsc() {return (features&TIMESTAMP_BIT)!=0;} | |
128 /// Is cmpxchg8b supported? | |
129 bool hasCmpxchg8b() {return (features&CMPXCHG8B_BIT)!=0;} | |
130 /// Is cmpxchg8b supported? | |
131 bool hasCmpxchg16b() {return (miscfeatures&CMPXCHG16B_BIT)!=0;} | |
132 /// Is 3DNow prefetch supported? | |
133 bool has3dnowPrefetch() | |
134 {return (amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;} | |
135 /// Are LAHF and SAHF supported in 64-bit mode? | |
136 bool hasLahfSahf() {return (amdmiscfeatures&LAHFSAHF_BIT)!=0;} | |
137 /// Is POPCNT supported? | |
138 bool hasPopcnt() {return (miscfeatures&POPCNT_BIT)!=0;} | |
139 /// Is LZCNT supported? | |
140 bool hasLzcnt() {return (amdmiscfeatures&LZCNT_BIT)!=0;} | |
141 /// Is this an Intel64 or AMD 64? | |
142 bool isX86_64() {return (amdfeatures&AMD64_BIT)!=0;} | |
143 | |
144 /// Is this an IA64 (Itanium) processor? | |
145 bool isItanium() { return (features&IA64_BIT)!=0; } | |
146 | |
147 /// Is hyperthreading supported? | |
148 bool hyperThreading() { return maxThreads>maxCores; } | |
149 /// Returns number of threads per CPU | |
150 uint threadsPerCPU() {return maxThreads;} | |
151 /// Returns number of cores in CPU | |
152 uint coresPerCPU() {return maxCores;} | |
153 | |
154 /// Optimisation hints for assembly code. | |
155 /// For forward compatibility, the CPU is compared against different | |
156 /// microarchitectures. For 32-bit X86, comparisons are made against | |
157 /// the Intel PPro/PII/PIII/PM family. | |
158 /// | |
159 /// The major 32-bit x86 microarchitecture 'dynasties' have been: | |
160 /// (1) Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). | |
161 /// (2) AMD Athlon (K7, K8, K10). | |
162 /// (3) Intel NetBurst (Pentium 4, Pentium D). | |
163 /// (4) In-order Pentium (Pentium1, PMMX) | |
164 /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta, | |
165 /// Cyrix, Rise) were mostly in-order. | |
166 /// Some new processors do not fit into the existing categories: | |
167 /// Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. | |
168 /// Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. | |
169 /// | |
170 /// Within each dynasty, the optimisation techniques are largely | |
171 /// identical (eg, use instruction pairing for group 4). Major | |
172 /// instruction set improvements occur within each group. | |
173 | |
174 /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code? | |
175 bool preferAthlon() { return probablyAMD && family >=6; } | |
176 /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code? | |
177 bool preferPentium4() { return probablyIntel && family == 0xF; } | |
178 /// Does this CPU perform better on Pentium I code than Pentium Pro code? | |
179 bool preferPentium1() { return family < 6 || (family==6 && model < 0xF && !probablyIntel); } | |
180 | |
181 __gshared: | |
182 public: | |
183 /// Processor type (vendor-dependent). | |
184 /// This should be visible ONLY for display purposes. | |
185 uint stepping, model, family; | |
186 uint numCacheLevels = 1; | |
187 private: | |
188 bool probablyIntel; // true = _probably_ an Intel processor, might be faking | |
189 bool probablyAMD; // true = _probably_ an AMD processor | |
190 char [12] vendorID; | |
191 char [] processorName; | |
192 char [48] processorNameBuffer; | |
193 uint features = 0; // mmx, sse, sse2, hyperthreading, etc | |
194 uint miscfeatures = 0; // sse3, etc. | |
195 uint amdfeatures = 0; // 3DNow!, mmxext, etc | |
196 uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc | |
197 uint maxCores = 1; | |
198 uint maxThreads = 1; | |
199 // Note that this may indicate multi-core rather than hyperthreading. | |
200 bool hyperThreadingBit() { return (features&HTT_BIT)!=0;} | |
201 | |
202 // feature flags CPUID1_EDX | |
203 enum : uint | |
204 { | |
205 FPU_BIT = 1, | |
206 TIMESTAMP_BIT = 1<<4, // rdtsc | |
207 MDSR_BIT = 1<<5, // RDMSR/WRMSR | |
208 CMPXCHG8B_BIT = 1<<8, | |
209 CMOV_BIT = 1<<15, | |
210 MMX_BIT = 1<<23, | |
211 FXSR_BIT = 1<<24, | |
212 SSE_BIT = 1<<25, | |
213 SSE2_BIT = 1<<26, | |
214 HTT_BIT = 1<<28, | |
215 IA64_BIT = 1<<30 | |
216 } | |
217 // feature flags misc CPUID1_ECX | |
218 enum : uint | |
219 { | |
220 SSE3_BIT = 1, | |
221 PCLMULQDQ_BIT = 1<<1, // from AVX | |
222 MWAIT_BIT = 1<<3, | |
223 SSSE3_BIT = 1<<9, | |
224 FMA_BIT = 1<<12, // from AVX | |
225 CMPXCHG16B_BIT = 1<<13, | |
226 SSE41_BIT = 1<<19, | |
227 SSE42_BIT = 1<<20, | |
228 POPCNT_BIT = 1<<23, | |
229 AES_BIT = 1<<25, // AES instructions from AVX | |
230 OSXSAVE_BIT = 1<<27, // Used for AVX | |
231 AVX_BIT = 1<<28 | |
232 } | |
233 /+ | |
234 version(X86_64) { | |
235 bool hasAVXinHardware() { | |
236 // This only indicates hardware support, not OS support. | |
237 return (miscfeatures&AVX_BIT) && (miscfeatures&OSXSAVE_BIT); | |
238 } | |
239 // Is AVX supported (in both hardware & OS)? | |
240 bool Avx() { | |
241 if (!hasAVXinHardware()) return false; | |
242 // Check for OS support | |
243 uint xfeatures; | |
244 asm {mov ECX, 0; xgetbv; mov xfeatures, EAX; } | |
245 return (xfeatures&0x6)==6; | |
246 } | |
247 bool hasAvxFma() { | |
248 if (!AVX()) return false; | |
249 return (features&FMA_BIT)!=0; | |
250 } | |
251 } | |
252 +/ | |
253 // AMD feature flags CPUID80000001_EDX | |
254 enum : uint | |
255 { | |
256 AMD_MMX_BIT = 1<<22, | |
257 // FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions. | |
258 FFXSR_BIT = 1<<25, | |
259 PAGE1GB_BIT = 1<<26, // support for 1GB pages | |
260 RDTSCP_BIT = 1<<27, | |
261 AMD64_BIT = 1<<29, | |
262 AMD_3DNOW_EXT_BIT = 1<<30, | |
263 AMD_3DNOW_BIT = 1<<31 | |
264 } | |
265 // AMD misc feature flags CPUID80000001_ECX | |
266 enum : uint | |
267 { | |
268 LAHFSAHF_BIT = 1, | |
269 LZCNT_BIT = 1<<5, | |
270 SSE4A_BIT = 1<<6, | |
271 AMD_3DNOW_PREFETCH_BIT = 1<<8, | |
272 SSE5_BIT = 1<<11 | |
273 } | |
274 | |
275 version(GNU){ | |
276 // GDC is a filthy liar. It can't actually do inline asm. | |
277 } else version(D_InlineAsm_X86) { | |
278 version = Really_D_InlineAsm_X86; | |
279 } | |
280 | |
281 version(Really_D_InlineAsm_X86) { | |
282 // Note that this code will also work for Itanium in x86 mode. | |
283 | |
284 shared uint max_cpuid, max_extended_cpuid; | |
285 | |
286 // CPUID2: "cache and tlb information" | |
287 void getcacheinfoCPUID2() | |
288 { | |
289 // We are only interested in the data caches | |
290 void decipherCpuid2(ubyte x) { | |
291 if (x==0) return; | |
292 // Values from http://www.sandpile.org/ia32/cpuid.htm. | |
293 // Includes Itanium and non-Intel CPUs. | |
294 // | |
295 immutable ubyte [47] ids = [ | |
296 0x0A, 0x0C, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68, | |
297 // level 2 cache | |
298 0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F, | |
299 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E, | |
300 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81, | |
301 // level 3 cache | |
302 0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D | |
303 ]; | |
304 immutable uint [47] sizes = [ | |
305 8, 16, 32, 16, 24, 8, 16, 32, | |
306 128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512, | |
307 256, 512, 1024, 2048, 512, 1024, 4096, 6*1024, | |
308 128, 192, 128, 256, 384, 512, 3072, 512, 128, | |
309 512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024 | |
310 ]; | |
311 // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative | |
312 immutable ubyte [47] ways = [ | |
313 2, 4, 8, 8, 6, 4, 4, 4, | |
314 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2, | |
315 8, 8, 8, 8, 4, 8, 16, 24, | |
316 4, 6, 2, 4, 6, 4, 12, 8, 8, | |
317 4, 8, 8, 8, 4, 8, 12, 16, 12, 16 | |
318 ]; | |
319 enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 } | |
320 for (int i=0; i< ids.length; ++i) { | |
321 if (x==ids[i]) { | |
322 int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2; | |
323 if (x==0x49 && family==0xF && model==0x6) level=2; | |
324 datacache[level].size=sizes[i]; | |
325 datacache[level].associativity=ways[i]; | |
326 if (level == 3 || x==0x2C || (x>=0x48 && x<=0x80) | |
327 || x==0x86 || x==0x87 | |
328 || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){ | |
329 datacache[level].lineSize = 64; | |
330 } else datacache[level].lineSize = 32; | |
331 } | |
332 } | |
333 } | |
334 | |
335 uint[4] a; | |
336 bool firstTime = true; | |
337 // On a multi-core system, this could theoretically fail, but it's only used | |
338 // for old single-core CPUs. | |
339 uint numinfos = 1; | |
340 do { | |
341 asm { | |
342 mov EAX, 2; | |
343 cpuid; | |
344 mov a, EAX; | |
345 mov a+4, EBX; | |
346 mov a+8, ECX; | |
347 mov a+12, EDX; | |
348 } | |
349 if (firstTime) { | |
350 if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) { | |
351 // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080. | |
352 // These are NOT standard Intel values | |
353 // (TLB = 32 entry, 4 way associative, 4K pages) | |
354 // (L1 cache = 16K, 4way, linesize16) | |
355 datacache[0].size=8; | |
356 datacache[0].associativity=4; | |
357 datacache[0].lineSize=16; | |
358 return; | |
359 } | |
360 // lsb of a is how many times to loop. | |
361 numinfos = a[0] & 0xFF; | |
362 // and otherwise it should be ignored | |
363 a[0] &= 0xFFFF_FF00; | |
364 firstTime = false; | |
365 } | |
366 for (int c=0; c<4;++c) { | |
367 // high bit set == no info. | |
368 if (a[c] & 0x8000_0000) continue; | |
369 decipherCpuid2(cast(ubyte)(a[c] & 0xFF)); | |
370 decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF)); | |
371 decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF)); | |
372 decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF)); | |
373 } | |
374 } while (--numinfos); | |
375 } | |
376 | |
377 // CPUID4: "Deterministic cache parameters" leaf | |
378 void getcacheinfoCPUID4() | |
379 { | |
380 int cachenum = 0; | |
381 for(;;) { | |
382 uint a, b, number_of_sets; | |
383 asm { | |
384 mov EAX, 4; | |
385 mov ECX, cachenum; | |
386 cpuid; | |
387 mov a, EAX; | |
388 mov b, EBX; | |
389 mov number_of_sets, ECX; | |
390 } | |
391 ++cachenum; | |
392 if ((a&0x1F)==0) break; // no more caches | |
393 uint numthreads = ((a>>14) & 0xFFF) + 1; | |
394 uint numcores = ((a>>26) & 0x3F) + 1; | |
395 if (numcores > maxCores) maxCores = numcores; | |
396 if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches | |
397 | |
398 ++number_of_sets; | |
399 ubyte level = cast(ubyte)(((a>>5)&7)-1); | |
400 if (level > datacache.length) continue; // ignore deep caches | |
401 datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1); | |
402 datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size | |
403 uint line_partitions = ((b >> 12)& 0x3FF) + 1; | |
404 // Size = number of sets * associativity * cachelinesize * linepartitions | |
405 // and must convert to Kb, also dividing by the number of hyperthreads using this cache. | |
406 ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets * | |
407 datacache[level].associativity : number_of_sets; | |
408 datacache[level].size = cast(uint)( | |
409 (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024)); | |
410 if (level == 0 && (a&0xF)==3) { | |
411 // Halve the size for unified L1 caches | |
412 datacache[level].size/=2; | |
413 } | |
414 } | |
415 } | |
416 | |
417 // CPUID8000_0005 & 6 | |
418 void getAMDcacheinfo() | |
419 { | |
420 uint c5, c6, d6; | |
421 asm { | |
422 mov EAX, 0x8000_0005; // L1 cache | |
423 cpuid; | |
424 // EAX has L1_TLB_4M. | |
425 // EBX has L1_TLB_4K | |
426 // EDX has L1 instruction cache | |
427 mov c5, ECX; | |
428 } | |
429 | |
430 datacache[0].size = ( (c5>>24) & 0xFF); | |
431 datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF); | |
432 datacache[0].lineSize = c5 & 0xFF; | |
433 | |
434 if (max_extended_cpuid >= 0x8000_0006) { | |
435 // AMD K6-III or K6-2+ or later. | |
436 ubyte numcores = 1; | |
437 if (max_extended_cpuid >=0x8000_0008) { | |
438 asm { | |
439 mov EAX, 0x8000_0008; | |
440 cpuid; | |
441 mov numcores, CL; | |
442 } | |
443 ++numcores; | |
444 if (numcores>maxCores) maxCores = numcores; | |
445 } | |
446 asm { | |
447 mov EAX, 0x8000_0006; // L2/L3 cache | |
448 cpuid; | |
449 mov c6, ECX; // L2 cache info | |
450 mov d6, EDX; // L3 cache info | |
451 } | |
452 | |
453 immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ]; | |
454 datacache[1].size = (c6>>16) & 0xFFFF; | |
455 datacache[1].associativity = assocmap[(c6>>12)&0xF]; | |
456 datacache[1].lineSize = c6 & 0xFF; | |
457 | |
458 // The L3 cache value is TOTAL, not per core. | |
459 datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1. | |
460 datacache[2].associativity = assocmap[(d6>>12)&0xF]; | |
461 datacache[2].lineSize = d6 & 0xFF; | |
462 } | |
463 } | |
464 | |
465 | |
466 void cpuidX86() | |
467 { | |
468 char * venptr = vendorID.ptr; | |
469 asm { | |
470 mov EAX, 0; | |
471 cpuid; | |
472 mov max_cpuid, EAX; | |
473 mov EAX, venptr; | |
474 mov [EAX], EBX; | |
475 mov [EAX + 4], EDX; | |
476 mov [EAX + 8], ECX; | |
477 mov EAX, 0x8000_0000; | |
478 cpuid; | |
479 mov max_extended_cpuid, EAX; | |
480 } | |
481 | |
482 probablyIntel = vendorID == "GenuineIntel"; | |
483 probablyAMD = vendorID == "AuthenticAMD"; | |
484 uint a, b, c, d; | |
485 uint apic = 0; // brand index, apic id | |
486 asm { | |
487 mov EAX, 1; // model, stepping | |
488 cpuid; | |
489 mov a, EAX; | |
490 mov apic, EBX; | |
491 mov miscfeatures, ECX; | |
492 mov features, EDX; | |
493 } | |
494 amdfeatures = 0; | |
495 amdmiscfeatures = 0; | |
496 if (max_extended_cpuid >= 0x8000_0001) { | |
497 asm { | |
498 mov EAX, 0x8000_0001; | |
499 cpuid; | |
500 mov amdmiscfeatures, ECX; | |
501 mov amdfeatures, EDX; | |
502 } | |
503 } | |
504 // Try to detect fraudulent vendorIDs | |
505 if (amd3dnow) probablyIntel = false; | |
506 | |
507 stepping = a & 0xF; | |
508 uint fbase = (a >> 8) & 0xF; | |
509 uint mbase = (a >> 4) & 0xF; | |
510 family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase; | |
511 model = ((fbase == 0xF) || (fbase == 6 && probablyIntel) ) ? | |
512 mbase + ((a >> 12) & 0xF0) : mbase; | |
513 | |
514 if (!probablyIntel && max_extended_cpuid >= 0x8000_0008) { | |
515 // determine max number of cores for AMD | |
516 asm { | |
517 mov EAX, 0x8000_0008; | |
518 cpuid; | |
519 mov c, ECX; | |
520 } | |
521 uint apicsize = (c>>12) & 0xF; | |
522 if (apicsize == 0) { | |
523 // use legacy method | |
524 if (hyperThreadingBit) maxCores = c & 0xFF; | |
525 else maxCores = 1; | |
526 } else { | |
527 // maxcores = 2^ apicsize | |
528 maxCores = 1; | |
529 while (apicsize) { maxCores<<=1; --apicsize; } | |
530 } | |
531 } | |
532 | |
533 if (max_extended_cpuid >= 0x8000_0004) { | |
534 char *procptr = processorNameBuffer.ptr; | |
535 asm { | |
536 push ESI; | |
537 mov ESI, procptr; | |
538 mov EAX, 0x8000_0002; | |
539 cpuid; | |
540 mov [ESI], EAX; | |
541 mov [ESI+4], EBX; | |
542 mov [ESI+8], ECX; | |
543 mov [ESI+12], EDX; | |
544 mov EAX, 0x8000_0003; | |
545 cpuid; | |
546 mov [ESI+16], EAX; | |
547 mov [ESI+20], EBX; | |
548 mov [ESI+24], ECX; | |
549 mov [ESI+28], EDX; | |
550 mov EAX, 0x8000_0004; | |
551 cpuid; | |
552 mov [ESI+32], EAX; | |
553 mov [ESI+36], EBX; | |
554 mov [ESI+40], ECX; | |
555 mov [ESI+44], EDX; | |
556 pop ESI; | |
557 } | |
558 // Intel P4 and PM pad at front with spaces. | |
559 // Other CPUs pad at end with nulls. | |
560 int start = 0, end = 0; | |
561 while (processorNameBuffer[start] == ' ') { ++start; } | |
562 while (processorNameBuffer[$-end-1] == 0) { ++end; } | |
563 processorName = processorNameBuffer[start..$-end]; | |
564 } else { | |
565 processorName[] = "Unknown CPU"; | |
566 } | |
567 // Determine cache sizes | |
568 | |
569 // Intel docs specify that they return 0 for 0x8000_0005. | |
570 // AMD docs do not specify the behaviour for 0004 and 0002. | |
571 // Centaur/VIA and most other manufacturers use the AMD method, | |
572 // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2! | |
573 // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour | |
574 // for CPUID80000005. But Geode GX uses the AMD method | |
575 | |
576 // Deal with Geode GX1 - make it same as MediaGX MMX. | |
577 if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) { | |
578 max_extended_cpuid = 0x8000_0004; | |
579 } | |
580 // Therefore, we try the AMD method unless it's an Intel chip. | |
581 // If we still have no info, try the Intel methods. | |
582 datacache[0].size = 0; | |
583 if (max_cpuid<2 || !probablyIntel) { | |
584 if (max_extended_cpuid >= 0x8000_0005) { | |
585 getAMDcacheinfo(); | |
586 } else if (probablyAMD) { | |
587 // According to AMDProcRecognitionAppNote, this means CPU | |
588 // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4) | |
589 // Am5x86 has 16Kb 4-way unified data & code cache. | |
590 datacache[0].size = 8; | |
591 datacache[0].associativity = 4; | |
592 datacache[0].lineSize = 32; | |
593 } else { | |
594 // Some obscure CPU. | |
595 // Values for Cyrix 6x86MX (family 6, model 0) | |
596 datacache[0].size = 64; | |
597 datacache[0].associativity = 4; | |
598 datacache[0].lineSize = 32; | |
599 } | |
600 } | |
601 if ((datacache[0].size == 0) && max_cpuid>=4) { | |
602 getcacheinfoCPUID4(); | |
603 } | |
604 if ((datacache[0].size == 0) && max_cpuid>=2) { | |
605 getcacheinfoCPUID2(); | |
606 } | |
607 if (datacache[0].size == 0) { | |
608 // Pentium, PMMX, late model 486, or an obscure CPU | |
609 if (mmx) { // Pentium MMX. Also has 8kB code cache. | |
610 datacache[0].size = 16; | |
611 datacache[0].associativity = 4; | |
612 datacache[0].lineSize = 32; | |
613 } else { // Pentium 1 (which also has 8kB code cache) | |
614 // or 486. | |
615 // Cyrix 6x86: 16, 4way, 32 linesize | |
616 datacache[0].size = 8; | |
617 datacache[0].associativity = 2; | |
618 datacache[0].lineSize = 32; | |
619 } | |
620 } | |
621 if (hyperThreadingBit) maxThreads = (apic>>>16) & 0xFF; | |
622 else maxThreads = maxCores; | |
623 } | |
624 | |
625 // Return true if the cpuid instruction is supported. | |
626 // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines. | |
627 bool hasCPUID() | |
628 { | |
629 uint flags; | |
630 asm { | |
631 pushfd; | |
632 pop EAX; | |
633 mov flags, EAX; | |
634 xor EAX, 0x0020_0000; | |
635 push EAX; | |
636 popfd; | |
637 pushfd; | |
638 pop EAX; | |
639 xor flags, EAX; | |
640 } | |
641 return (flags & 0x0020_0000) !=0; | |
642 } | |
643 | |
644 } else { // inline asm X86 | |
645 | |
646 bool hasCPUID() { return false; } | |
647 | |
648 void cpuidX86() | |
649 { | |
650 datacache[0].size = 8; | |
651 datacache[0].associativity = 2; | |
652 datacache[0].lineSize = 32; | |
653 } | |
654 } | |
655 | |
656 // TODO: Implement this function with OS support | |
657 void cpuidPPC() | |
658 { | |
659 enum :int { PPC601, PPC603, PPC603E, PPC604, | |
660 PPC604E, PPC620, PPCG3, PPCG4, PPCG5 }; | |
661 | |
662 // TODO: | |
663 // asm { mfpvr; } returns the CPU version but unfortunately it can | |
664 // only be used in kernel mode. So OS support is required. | |
665 int cputype = PPC603; | |
666 | |
667 // 601 has a 8KB combined data & code L1 cache. | |
668 uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64]; | |
669 ubyte ways[] = [8, 2, 4, 4, 4, 8, 8, 8, 8]; | |
670 uint L2size[]= [0, 0, 0, 0, 0, 0, 0, 256, 512]; | |
671 uint L3size[]= [0, 0, 0, 0, 0, 0, 0, 2048, 0]; | |
672 | |
673 datacache[0].size = sizes[cputype]; | |
674 datacache[0].associativity = ways[cputype]; | |
675 datacache[0].lineSize = (cputype==PPCG5)? 128 : | |
676 (cputype == PPC620 || cputype == PPCG3)? 64 : 32; | |
677 datacache[1].size = L2size[cputype]; | |
678 datacache[2].size = L3size[cputype]; | |
679 datacache[1].lineSize = datacache[0].lineSize; | |
680 datacache[2].lineSize = datacache[0].lineSize; | |
681 } | |
682 | |
683 // TODO: Implement this function with OS support | |
684 void cpuidSparc() | |
685 { | |
686 // UltaSparcIIi : L1 = 16, 2way. L2 = 512, 4 way. | |
687 // UltraSparcIII : L1 = 64, 4way. L2= 4096 or 8192. | |
688 // UltraSparcIIIi: L1 = 64, 4way. L2= 1024, 4 way | |
689 // UltraSparcIV : L1 = 64, 4way. L2 = 16*1024. | |
690 // UltraSparcIV+ : L1 = 64, 4way. L2 = 2048, L3=32*1024. | |
691 // Sparc64V : L1 = 128, 2way. L2 = 4096 4way. | |
692 } | |
693 | |
694 | |
695 static this() | |
696 { | |
697 if (hasCPUID()) { | |
698 cpuidX86(); | |
699 } else { | |
700 // it's a 386 or 486, or a Cyrix 6x86. | |
701 //Probably still has an external cache. | |
702 } | |
703 if (datacache[0].size==0) { | |
704 // Guess same as Pentium 1. | |
705 datacache[0].size = 8; | |
706 datacache[0].associativity = 2; | |
707 datacache[0].lineSize = 32; | |
708 } | |
709 numCacheLevels = 1; | |
710 // And now fill up all the unused levels with full memory space. | |
711 for (int i=1; i< datacache.length; ++i) { | |
712 if (datacache[i].size==0) { | |
713 // Set all remaining levels of cache equal to full address space. | |
714 datacache[i].size = uint.max/1024; | |
715 datacache[i].associativity = 1; | |
716 datacache[i].lineSize = datacache[i-1].lineSize; | |
717 } else numCacheLevels = i+1; | |
718 } | |
719 } |