comparison druntime/src/compiler/dmd/util/cpuid.d @ 1458:e0b2d67cfe7c

Added druntime (this should be removed once it works).
author Robert Clipsham <robert@octarineparrot.com>
date Tue, 02 Jun 2009 17:43:06 +0100
parents
children
comparison
equal deleted inserted replaced
1456:7b218ec1044f 1458:e0b2d67cfe7c
1 /**
2 * Identify the characteristics of the host CPU, providing information
3 * about cache sizes and assembly optimisation hints.
4 *
5 * Some of this information was extremely difficult to track down. Some of the
6 * documents below were found only in cached versions stored by search engines!
7 * This code relies on information found in:
8
9 * - "Intel(R) 64 and IA-32 Architectures Software Developers Manual,
10 * Volume 2A: Instruction Set Reference, A-M" (2007).
11 * - "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).
12 * - "AMD Processor Recognition Application Note For Processors Prior to AMD
13 * Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).
14 * - "AMD Geode(TM) GX Processors Data Book",
15 * Advanced Micro Devices, Publication ID 31505E, (2005).
16 * - "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).
17 * - "Application note 106: Software Customization for the 6x86 Family",
18 * Cyrix Corporation, Rev 1.5 (1998)
19 * - http://ftp.intron.ac/pub/document/cpu/cpuid.htm
20 * - "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution",
21 * National Semiconductor, (2002)
22 * - "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).
23 * - http://www.sandpile.org/ia32/cpuid.htm
24 * - http://grafi.ii.pw.edu.pl/gbm/x86/cpuid.html
25 * - "What every programmer should know about memory",
26 * Ulrich Depper, Red Hat, Inc., (2007).
27 *
28 * Bugs: Currently only works on x86 and Itanium CPUs.
29 * Many processors have bugs in their microcode for the CPUID instruction,
30 * so sometimes the cache information may be incorrect.
31 *
32 * Copyright: Copyright Don Clugston 2007 - 2009.
33 * License: <a href="http://www.boost.org/LICENSE_1_0.txt>Boost License 1.0</a>.
34 * Authors: Don Clugston, Tomas Lindquist Olsen &lt;tomas@famolsen.dk&gt;
35 *
36 * Copyright Don Clugston 2007 - 2009.
37 * Distributed under the Boost Software License, Version 1.0.
38 * (See accompanying file LICENSE_1_0.txt or copy at
39 * http://www.boost.org/LICENSE_1_0.txt)
40 */
41 module rt.util.cpuid;
42
43 // If optimizing for a particular processor, it is generally better
44 // to identify based on features rather than model. NOTE: Normally
45 // it's only worthwhile to optimise for the latest Intel and AMD CPU,
46 // with a backup for other CPUs.
47 // Pentium -- preferPentium1()
48 // PMMX -- + mmx()
49 // PPro -- default
50 // PII -- + mmx()
51 // PIII -- + mmx() + sse()
52 // PentiumM -- + mmx() + sse() + sse2()
53 // Pentium4 -- preferPentium4()
54 // PentiumD -- + isX86_64()
55 // Core2 -- default + isX86_64()
56 // AMD K5 -- preferPentium1()
57 // AMD K6 -- + mmx()
58 // AMD K6-II -- + mmx() + 3dnow()
59 // AMD K7 -- preferAthlon()
60 // AMD K8 -- + sse2()
61 // AMD K10 -- + isX86_64()
62 // Cyrix 6x86 -- preferPentium1()
63 // 6x86MX -- + mmx()
64
65 public:
66
67 /// Cache size and behaviour
68 struct CacheInfo
69 {
70 /// Size of the cache, in kilobytes, per CPU.
71 /// For L1 unified (data + code) caches, this size is half the physical size.
72 /// (we don't halve it for larger sizes, since normally
73 /// data size is much greater than code size for critical loops).
74 uint size;
75 /// Number of ways of associativity, eg:
76 /// 1 = direct mapped
77 /// 2 = 2-way set associative
78 /// 3 = 3-way set associative
79 /// ubyte.max = fully associative
80 ubyte associativity;
81 /// Number of bytes read into the cache when a cache miss occurs.
82 uint lineSize;
83 }
84
85 public:
86 /// Returns vendor string, for display purposes only.
87 /// Do NOT use this to determine features!
88 /// Note that some CPUs have programmable vendorIDs.
89 char[] vendor() {return vendorID;}
90 /// Returns processor string, for display purposes only
91 char[] processor() {return processorName;}
92
93 /// The data caches. If there are fewer than 5 physical caches levels,
94 /// the remaining levels are set to uint.max (== entire memory space)
95 __gshared CacheInfo[5] datacache;
96 /// Does it have an x87 FPU on-chip?
97 bool x87onChip() {return (features&FPU_BIT)!=0;}
98 /// Is MMX supported?
99 bool mmx() {return (features&MMX_BIT)!=0;}
100 /// Is SSE supported?
101 bool sse() {return (features&SSE_BIT)!=0;}
102 /// Is SSE2 supported?
103 bool sse2() {return (features&SSE2_BIT)!=0;}
104 /// Is SSE3 supported?
105 bool sse3() {return (miscfeatures&SSE3_BIT)!=0;}
106 /// Is SSSE3 supported?
107 bool ssse3() {return (miscfeatures&SSSE3_BIT)!=0;}
108 /// Is SSE4.1 supported?
109 bool sse41() {return (miscfeatures&SSE41_BIT)!=0;}
110 /// Is SSE4.2 supported?
111 bool sse42() {return (miscfeatures&SSE42_BIT)!=0;}
112 /// Is SSE4a supported?
113 bool sse4a() {return (amdmiscfeatures&SSE4A_BIT)!=0;}
114 /// Is SSE5 supported?
115 bool sse5() {return (amdmiscfeatures&SSE5_BIT)!=0;}
116 /// Is AMD 3DNOW supported?
117 bool amd3dnow() {return (amdfeatures&AMD_3DNOW_BIT)!=0;}
118 /// Is AMD 3DNOW Ext supported?
119 bool amd3dnowExt() {return (amdfeatures&AMD_3DNOW_EXT_BIT)!=0;}
120 /// Are AMD extensions to MMX supported?
121 bool amdMmx() {return (amdfeatures&AMD_MMX_BIT)!=0;}
122 /// Is fxsave/fxrstor supported?
123 bool hasFxsr() {return (features&FXSR_BIT)!=0;}
124 /// Is cmov supported?
125 bool hasCmov() {return (features&CMOV_BIT)!=0;}
126 /// Is rdtsc supported?
127 bool hasRdtsc() {return (features&TIMESTAMP_BIT)!=0;}
128 /// Is cmpxchg8b supported?
129 bool hasCmpxchg8b() {return (features&CMPXCHG8B_BIT)!=0;}
130 /// Is cmpxchg8b supported?
131 bool hasCmpxchg16b() {return (miscfeatures&CMPXCHG16B_BIT)!=0;}
132 /// Is 3DNow prefetch supported?
133 bool has3dnowPrefetch()
134 {return (amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;}
135 /// Are LAHF and SAHF supported in 64-bit mode?
136 bool hasLahfSahf() {return (amdmiscfeatures&LAHFSAHF_BIT)!=0;}
137 /// Is POPCNT supported?
138 bool hasPopcnt() {return (miscfeatures&POPCNT_BIT)!=0;}
139 /// Is LZCNT supported?
140 bool hasLzcnt() {return (amdmiscfeatures&LZCNT_BIT)!=0;}
141 /// Is this an Intel64 or AMD 64?
142 bool isX86_64() {return (amdfeatures&AMD64_BIT)!=0;}
143
144 /// Is this an IA64 (Itanium) processor?
145 bool isItanium() { return (features&IA64_BIT)!=0; }
146
147 /// Is hyperthreading supported?
148 bool hyperThreading() { return maxThreads>maxCores; }
149 /// Returns number of threads per CPU
150 uint threadsPerCPU() {return maxThreads;}
151 /// Returns number of cores in CPU
152 uint coresPerCPU() {return maxCores;}
153
154 /// Optimisation hints for assembly code.
155 /// For forward compatibility, the CPU is compared against different
156 /// microarchitectures. For 32-bit X86, comparisons are made against
157 /// the Intel PPro/PII/PIII/PM family.
158 ///
159 /// The major 32-bit x86 microarchitecture 'dynasties' have been:
160 /// (1) Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2).
161 /// (2) AMD Athlon (K7, K8, K10).
162 /// (3) Intel NetBurst (Pentium 4, Pentium D).
163 /// (4) In-order Pentium (Pentium1, PMMX)
164 /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta,
165 /// Cyrix, Rise) were mostly in-order.
166 /// Some new processors do not fit into the existing categories:
167 /// Intel Atom 230/330 (family 6, model 0x1C) is an in-order core.
168 /// Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core.
169 ///
170 /// Within each dynasty, the optimisation techniques are largely
171 /// identical (eg, use instruction pairing for group 4). Major
172 /// instruction set improvements occur within each group.
173
174 /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code?
175 bool preferAthlon() { return probablyAMD && family >=6; }
176 /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code?
177 bool preferPentium4() { return probablyIntel && family == 0xF; }
178 /// Does this CPU perform better on Pentium I code than Pentium Pro code?
179 bool preferPentium1() { return family < 6 || (family==6 && model < 0xF && !probablyIntel); }
180
181 __gshared:
182 public:
183 /// Processor type (vendor-dependent).
184 /// This should be visible ONLY for display purposes.
185 uint stepping, model, family;
186 uint numCacheLevels = 1;
187 private:
188 bool probablyIntel; // true = _probably_ an Intel processor, might be faking
189 bool probablyAMD; // true = _probably_ an AMD processor
190 char [12] vendorID;
191 char [] processorName;
192 char [48] processorNameBuffer;
193 uint features = 0; // mmx, sse, sse2, hyperthreading, etc
194 uint miscfeatures = 0; // sse3, etc.
195 uint amdfeatures = 0; // 3DNow!, mmxext, etc
196 uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc
197 uint maxCores = 1;
198 uint maxThreads = 1;
199 // Note that this may indicate multi-core rather than hyperthreading.
200 bool hyperThreadingBit() { return (features&HTT_BIT)!=0;}
201
202 // feature flags CPUID1_EDX
203 enum : uint
204 {
205 FPU_BIT = 1,
206 TIMESTAMP_BIT = 1<<4, // rdtsc
207 MDSR_BIT = 1<<5, // RDMSR/WRMSR
208 CMPXCHG8B_BIT = 1<<8,
209 CMOV_BIT = 1<<15,
210 MMX_BIT = 1<<23,
211 FXSR_BIT = 1<<24,
212 SSE_BIT = 1<<25,
213 SSE2_BIT = 1<<26,
214 HTT_BIT = 1<<28,
215 IA64_BIT = 1<<30
216 }
217 // feature flags misc CPUID1_ECX
218 enum : uint
219 {
220 SSE3_BIT = 1,
221 PCLMULQDQ_BIT = 1<<1, // from AVX
222 MWAIT_BIT = 1<<3,
223 SSSE3_BIT = 1<<9,
224 FMA_BIT = 1<<12, // from AVX
225 CMPXCHG16B_BIT = 1<<13,
226 SSE41_BIT = 1<<19,
227 SSE42_BIT = 1<<20,
228 POPCNT_BIT = 1<<23,
229 AES_BIT = 1<<25, // AES instructions from AVX
230 OSXSAVE_BIT = 1<<27, // Used for AVX
231 AVX_BIT = 1<<28
232 }
233 /+
234 version(X86_64) {
235 bool hasAVXinHardware() {
236 // This only indicates hardware support, not OS support.
237 return (miscfeatures&AVX_BIT) && (miscfeatures&OSXSAVE_BIT);
238 }
239 // Is AVX supported (in both hardware & OS)?
240 bool Avx() {
241 if (!hasAVXinHardware()) return false;
242 // Check for OS support
243 uint xfeatures;
244 asm {mov ECX, 0; xgetbv; mov xfeatures, EAX; }
245 return (xfeatures&0x6)==6;
246 }
247 bool hasAvxFma() {
248 if (!AVX()) return false;
249 return (features&FMA_BIT)!=0;
250 }
251 }
252 +/
253 // AMD feature flags CPUID80000001_EDX
254 enum : uint
255 {
256 AMD_MMX_BIT = 1<<22,
257 // FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions.
258 FFXSR_BIT = 1<<25,
259 PAGE1GB_BIT = 1<<26, // support for 1GB pages
260 RDTSCP_BIT = 1<<27,
261 AMD64_BIT = 1<<29,
262 AMD_3DNOW_EXT_BIT = 1<<30,
263 AMD_3DNOW_BIT = 1<<31
264 }
265 // AMD misc feature flags CPUID80000001_ECX
266 enum : uint
267 {
268 LAHFSAHF_BIT = 1,
269 LZCNT_BIT = 1<<5,
270 SSE4A_BIT = 1<<6,
271 AMD_3DNOW_PREFETCH_BIT = 1<<8,
272 SSE5_BIT = 1<<11
273 }
274
275 version(GNU){
276 // GDC is a filthy liar. It can't actually do inline asm.
277 } else version(D_InlineAsm_X86) {
278 version = Really_D_InlineAsm_X86;
279 }
280
281 version(Really_D_InlineAsm_X86) {
282 // Note that this code will also work for Itanium in x86 mode.
283
284 shared uint max_cpuid, max_extended_cpuid;
285
286 // CPUID2: "cache and tlb information"
287 void getcacheinfoCPUID2()
288 {
289 // We are only interested in the data caches
290 void decipherCpuid2(ubyte x) {
291 if (x==0) return;
292 // Values from http://www.sandpile.org/ia32/cpuid.htm.
293 // Includes Itanium and non-Intel CPUs.
294 //
295 immutable ubyte [47] ids = [
296 0x0A, 0x0C, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68,
297 // level 2 cache
298 0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F,
299 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E,
300 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81,
301 // level 3 cache
302 0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D
303 ];
304 immutable uint [47] sizes = [
305 8, 16, 32, 16, 24, 8, 16, 32,
306 128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512,
307 256, 512, 1024, 2048, 512, 1024, 4096, 6*1024,
308 128, 192, 128, 256, 384, 512, 3072, 512, 128,
309 512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024
310 ];
311 // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative
312 immutable ubyte [47] ways = [
313 2, 4, 8, 8, 6, 4, 4, 4,
314 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2,
315 8, 8, 8, 8, 4, 8, 16, 24,
316 4, 6, 2, 4, 6, 4, 12, 8, 8,
317 4, 8, 8, 8, 4, 8, 12, 16, 12, 16
318 ];
319 enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 }
320 for (int i=0; i< ids.length; ++i) {
321 if (x==ids[i]) {
322 int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2;
323 if (x==0x49 && family==0xF && model==0x6) level=2;
324 datacache[level].size=sizes[i];
325 datacache[level].associativity=ways[i];
326 if (level == 3 || x==0x2C || (x>=0x48 && x<=0x80)
327 || x==0x86 || x==0x87
328 || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){
329 datacache[level].lineSize = 64;
330 } else datacache[level].lineSize = 32;
331 }
332 }
333 }
334
335 uint[4] a;
336 bool firstTime = true;
337 // On a multi-core system, this could theoretically fail, but it's only used
338 // for old single-core CPUs.
339 uint numinfos = 1;
340 do {
341 asm {
342 mov EAX, 2;
343 cpuid;
344 mov a, EAX;
345 mov a+4, EBX;
346 mov a+8, ECX;
347 mov a+12, EDX;
348 }
349 if (firstTime) {
350 if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) {
351 // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080.
352 // These are NOT standard Intel values
353 // (TLB = 32 entry, 4 way associative, 4K pages)
354 // (L1 cache = 16K, 4way, linesize16)
355 datacache[0].size=8;
356 datacache[0].associativity=4;
357 datacache[0].lineSize=16;
358 return;
359 }
360 // lsb of a is how many times to loop.
361 numinfos = a[0] & 0xFF;
362 // and otherwise it should be ignored
363 a[0] &= 0xFFFF_FF00;
364 firstTime = false;
365 }
366 for (int c=0; c<4;++c) {
367 // high bit set == no info.
368 if (a[c] & 0x8000_0000) continue;
369 decipherCpuid2(cast(ubyte)(a[c] & 0xFF));
370 decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF));
371 decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF));
372 decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF));
373 }
374 } while (--numinfos);
375 }
376
377 // CPUID4: "Deterministic cache parameters" leaf
378 void getcacheinfoCPUID4()
379 {
380 int cachenum = 0;
381 for(;;) {
382 uint a, b, number_of_sets;
383 asm {
384 mov EAX, 4;
385 mov ECX, cachenum;
386 cpuid;
387 mov a, EAX;
388 mov b, EBX;
389 mov number_of_sets, ECX;
390 }
391 ++cachenum;
392 if ((a&0x1F)==0) break; // no more caches
393 uint numthreads = ((a>>14) & 0xFFF) + 1;
394 uint numcores = ((a>>26) & 0x3F) + 1;
395 if (numcores > maxCores) maxCores = numcores;
396 if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches
397
398 ++number_of_sets;
399 ubyte level = cast(ubyte)(((a>>5)&7)-1);
400 if (level > datacache.length) continue; // ignore deep caches
401 datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1);
402 datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size
403 uint line_partitions = ((b >> 12)& 0x3FF) + 1;
404 // Size = number of sets * associativity * cachelinesize * linepartitions
405 // and must convert to Kb, also dividing by the number of hyperthreads using this cache.
406 ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets *
407 datacache[level].associativity : number_of_sets;
408 datacache[level].size = cast(uint)(
409 (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024));
410 if (level == 0 && (a&0xF)==3) {
411 // Halve the size for unified L1 caches
412 datacache[level].size/=2;
413 }
414 }
415 }
416
417 // CPUID8000_0005 & 6
418 void getAMDcacheinfo()
419 {
420 uint c5, c6, d6;
421 asm {
422 mov EAX, 0x8000_0005; // L1 cache
423 cpuid;
424 // EAX has L1_TLB_4M.
425 // EBX has L1_TLB_4K
426 // EDX has L1 instruction cache
427 mov c5, ECX;
428 }
429
430 datacache[0].size = ( (c5>>24) & 0xFF);
431 datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF);
432 datacache[0].lineSize = c5 & 0xFF;
433
434 if (max_extended_cpuid >= 0x8000_0006) {
435 // AMD K6-III or K6-2+ or later.
436 ubyte numcores = 1;
437 if (max_extended_cpuid >=0x8000_0008) {
438 asm {
439 mov EAX, 0x8000_0008;
440 cpuid;
441 mov numcores, CL;
442 }
443 ++numcores;
444 if (numcores>maxCores) maxCores = numcores;
445 }
446 asm {
447 mov EAX, 0x8000_0006; // L2/L3 cache
448 cpuid;
449 mov c6, ECX; // L2 cache info
450 mov d6, EDX; // L3 cache info
451 }
452
453 immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ];
454 datacache[1].size = (c6>>16) & 0xFFFF;
455 datacache[1].associativity = assocmap[(c6>>12)&0xF];
456 datacache[1].lineSize = c6 & 0xFF;
457
458 // The L3 cache value is TOTAL, not per core.
459 datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1.
460 datacache[2].associativity = assocmap[(d6>>12)&0xF];
461 datacache[2].lineSize = d6 & 0xFF;
462 }
463 }
464
465
466 void cpuidX86()
467 {
468 char * venptr = vendorID.ptr;
469 asm {
470 mov EAX, 0;
471 cpuid;
472 mov max_cpuid, EAX;
473 mov EAX, venptr;
474 mov [EAX], EBX;
475 mov [EAX + 4], EDX;
476 mov [EAX + 8], ECX;
477 mov EAX, 0x8000_0000;
478 cpuid;
479 mov max_extended_cpuid, EAX;
480 }
481
482 probablyIntel = vendorID == "GenuineIntel";
483 probablyAMD = vendorID == "AuthenticAMD";
484 uint a, b, c, d;
485 uint apic = 0; // brand index, apic id
486 asm {
487 mov EAX, 1; // model, stepping
488 cpuid;
489 mov a, EAX;
490 mov apic, EBX;
491 mov miscfeatures, ECX;
492 mov features, EDX;
493 }
494 amdfeatures = 0;
495 amdmiscfeatures = 0;
496 if (max_extended_cpuid >= 0x8000_0001) {
497 asm {
498 mov EAX, 0x8000_0001;
499 cpuid;
500 mov amdmiscfeatures, ECX;
501 mov amdfeatures, EDX;
502 }
503 }
504 // Try to detect fraudulent vendorIDs
505 if (amd3dnow) probablyIntel = false;
506
507 stepping = a & 0xF;
508 uint fbase = (a >> 8) & 0xF;
509 uint mbase = (a >> 4) & 0xF;
510 family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase;
511 model = ((fbase == 0xF) || (fbase == 6 && probablyIntel) ) ?
512 mbase + ((a >> 12) & 0xF0) : mbase;
513
514 if (!probablyIntel && max_extended_cpuid >= 0x8000_0008) {
515 // determine max number of cores for AMD
516 asm {
517 mov EAX, 0x8000_0008;
518 cpuid;
519 mov c, ECX;
520 }
521 uint apicsize = (c>>12) & 0xF;
522 if (apicsize == 0) {
523 // use legacy method
524 if (hyperThreadingBit) maxCores = c & 0xFF;
525 else maxCores = 1;
526 } else {
527 // maxcores = 2^ apicsize
528 maxCores = 1;
529 while (apicsize) { maxCores<<=1; --apicsize; }
530 }
531 }
532
533 if (max_extended_cpuid >= 0x8000_0004) {
534 char *procptr = processorNameBuffer.ptr;
535 asm {
536 push ESI;
537 mov ESI, procptr;
538 mov EAX, 0x8000_0002;
539 cpuid;
540 mov [ESI], EAX;
541 mov [ESI+4], EBX;
542 mov [ESI+8], ECX;
543 mov [ESI+12], EDX;
544 mov EAX, 0x8000_0003;
545 cpuid;
546 mov [ESI+16], EAX;
547 mov [ESI+20], EBX;
548 mov [ESI+24], ECX;
549 mov [ESI+28], EDX;
550 mov EAX, 0x8000_0004;
551 cpuid;
552 mov [ESI+32], EAX;
553 mov [ESI+36], EBX;
554 mov [ESI+40], ECX;
555 mov [ESI+44], EDX;
556 pop ESI;
557 }
558 // Intel P4 and PM pad at front with spaces.
559 // Other CPUs pad at end with nulls.
560 int start = 0, end = 0;
561 while (processorNameBuffer[start] == ' ') { ++start; }
562 while (processorNameBuffer[$-end-1] == 0) { ++end; }
563 processorName = processorNameBuffer[start..$-end];
564 } else {
565 processorName[] = "Unknown CPU";
566 }
567 // Determine cache sizes
568
569 // Intel docs specify that they return 0 for 0x8000_0005.
570 // AMD docs do not specify the behaviour for 0004 and 0002.
571 // Centaur/VIA and most other manufacturers use the AMD method,
572 // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2!
573 // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour
574 // for CPUID80000005. But Geode GX uses the AMD method
575
576 // Deal with Geode GX1 - make it same as MediaGX MMX.
577 if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) {
578 max_extended_cpuid = 0x8000_0004;
579 }
580 // Therefore, we try the AMD method unless it's an Intel chip.
581 // If we still have no info, try the Intel methods.
582 datacache[0].size = 0;
583 if (max_cpuid<2 || !probablyIntel) {
584 if (max_extended_cpuid >= 0x8000_0005) {
585 getAMDcacheinfo();
586 } else if (probablyAMD) {
587 // According to AMDProcRecognitionAppNote, this means CPU
588 // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4)
589 // Am5x86 has 16Kb 4-way unified data & code cache.
590 datacache[0].size = 8;
591 datacache[0].associativity = 4;
592 datacache[0].lineSize = 32;
593 } else {
594 // Some obscure CPU.
595 // Values for Cyrix 6x86MX (family 6, model 0)
596 datacache[0].size = 64;
597 datacache[0].associativity = 4;
598 datacache[0].lineSize = 32;
599 }
600 }
601 if ((datacache[0].size == 0) && max_cpuid>=4) {
602 getcacheinfoCPUID4();
603 }
604 if ((datacache[0].size == 0) && max_cpuid>=2) {
605 getcacheinfoCPUID2();
606 }
607 if (datacache[0].size == 0) {
608 // Pentium, PMMX, late model 486, or an obscure CPU
609 if (mmx) { // Pentium MMX. Also has 8kB code cache.
610 datacache[0].size = 16;
611 datacache[0].associativity = 4;
612 datacache[0].lineSize = 32;
613 } else { // Pentium 1 (which also has 8kB code cache)
614 // or 486.
615 // Cyrix 6x86: 16, 4way, 32 linesize
616 datacache[0].size = 8;
617 datacache[0].associativity = 2;
618 datacache[0].lineSize = 32;
619 }
620 }
621 if (hyperThreadingBit) maxThreads = (apic>>>16) & 0xFF;
622 else maxThreads = maxCores;
623 }
624
625 // Return true if the cpuid instruction is supported.
626 // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines.
627 bool hasCPUID()
628 {
629 uint flags;
630 asm {
631 pushfd;
632 pop EAX;
633 mov flags, EAX;
634 xor EAX, 0x0020_0000;
635 push EAX;
636 popfd;
637 pushfd;
638 pop EAX;
639 xor flags, EAX;
640 }
641 return (flags & 0x0020_0000) !=0;
642 }
643
644 } else { // inline asm X86
645
646 bool hasCPUID() { return false; }
647
648 void cpuidX86()
649 {
650 datacache[0].size = 8;
651 datacache[0].associativity = 2;
652 datacache[0].lineSize = 32;
653 }
654 }
655
656 // TODO: Implement this function with OS support
657 void cpuidPPC()
658 {
659 enum :int { PPC601, PPC603, PPC603E, PPC604,
660 PPC604E, PPC620, PPCG3, PPCG4, PPCG5 };
661
662 // TODO:
663 // asm { mfpvr; } returns the CPU version but unfortunately it can
664 // only be used in kernel mode. So OS support is required.
665 int cputype = PPC603;
666
667 // 601 has a 8KB combined data & code L1 cache.
668 uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64];
669 ubyte ways[] = [8, 2, 4, 4, 4, 8, 8, 8, 8];
670 uint L2size[]= [0, 0, 0, 0, 0, 0, 0, 256, 512];
671 uint L3size[]= [0, 0, 0, 0, 0, 0, 0, 2048, 0];
672
673 datacache[0].size = sizes[cputype];
674 datacache[0].associativity = ways[cputype];
675 datacache[0].lineSize = (cputype==PPCG5)? 128 :
676 (cputype == PPC620 || cputype == PPCG3)? 64 : 32;
677 datacache[1].size = L2size[cputype];
678 datacache[2].size = L3size[cputype];
679 datacache[1].lineSize = datacache[0].lineSize;
680 datacache[2].lineSize = datacache[0].lineSize;
681 }
682
683 // TODO: Implement this function with OS support
684 void cpuidSparc()
685 {
686 // UltaSparcIIi : L1 = 16, 2way. L2 = 512, 4 way.
687 // UltraSparcIII : L1 = 64, 4way. L2= 4096 or 8192.
688 // UltraSparcIIIi: L1 = 64, 4way. L2= 1024, 4 way
689 // UltraSparcIV : L1 = 64, 4way. L2 = 16*1024.
690 // UltraSparcIV+ : L1 = 64, 4way. L2 = 2048, L3=32*1024.
691 // Sparc64V : L1 = 128, 2way. L2 = 4096 4way.
692 }
693
694
695 static this()
696 {
697 if (hasCPUID()) {
698 cpuidX86();
699 } else {
700 // it's a 386 or 486, or a Cyrix 6x86.
701 //Probably still has an external cache.
702 }
703 if (datacache[0].size==0) {
704 // Guess same as Pentium 1.
705 datacache[0].size = 8;
706 datacache[0].associativity = 2;
707 datacache[0].lineSize = 32;
708 }
709 numCacheLevels = 1;
710 // And now fill up all the unused levels with full memory space.
711 for (int i=1; i< datacache.length; ++i) {
712 if (datacache[i].size==0) {
713 // Set all remaining levels of cache equal to full address space.
714 datacache[i].size = uint.max/1024;
715 datacache[i].associativity = 1;
716 datacache[i].lineSize = datacache[i-1].lineSize;
717 } else numCacheLevels = i+1;
718 }
719 }