Mercurial > projects > ldc
diff druntime/src/compiler/dmd/util/cpuid.d @ 1458:e0b2d67cfe7c
Added druntime (this should be removed once it works).
author | Robert Clipsham <robert@octarineparrot.com> |
---|---|
date | Tue, 02 Jun 2009 17:43:06 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/druntime/src/compiler/dmd/util/cpuid.d Tue Jun 02 17:43:06 2009 +0100 @@ -0,0 +1,719 @@ +/** + * Identify the characteristics of the host CPU, providing information + * about cache sizes and assembly optimisation hints. + * + * Some of this information was extremely difficult to track down. Some of the + * documents below were found only in cached versions stored by search engines! + * This code relies on information found in: + + * - "Intel(R) 64 and IA-32 Architectures Software Developers Manual, + * Volume 2A: Instruction Set Reference, A-M" (2007). + * - "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008). + * - "AMD Processor Recognition Application Note For Processors Prior to AMD + * Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005). + * - "AMD Geode(TM) GX Processors Data Book", + * Advanced Micro Devices, Publication ID 31505E, (2005). + * - "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000). + * - "Application note 106: Software Customization for the 6x86 Family", + * Cyrix Corporation, Rev 1.5 (1998) + * - http://ftp.intron.ac/pub/document/cpu/cpuid.htm + * - "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution", + * National Semiconductor, (2002) + * - "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008). + * - http://www.sandpile.org/ia32/cpuid.htm + * - http://grafi.ii.pw.edu.pl/gbm/x86/cpuid.html + * - "What every programmer should know about memory", + * Ulrich Depper, Red Hat, Inc., (2007). + * + * Bugs: Currently only works on x86 and Itanium CPUs. + * Many processors have bugs in their microcode for the CPUID instruction, + * so sometimes the cache information may be incorrect. + * + * Copyright: Copyright Don Clugston 2007 - 2009. + * License: <a href="http://www.boost.org/LICENSE_1_0.txt>Boost License 1.0</a>. + * Authors: Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk> + * + * Copyright Don Clugston 2007 - 2009. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ +module rt.util.cpuid; + +// If optimizing for a particular processor, it is generally better +// to identify based on features rather than model. NOTE: Normally +// it's only worthwhile to optimise for the latest Intel and AMD CPU, +// with a backup for other CPUs. +// Pentium -- preferPentium1() +// PMMX -- + mmx() +// PPro -- default +// PII -- + mmx() +// PIII -- + mmx() + sse() +// PentiumM -- + mmx() + sse() + sse2() +// Pentium4 -- preferPentium4() +// PentiumD -- + isX86_64() +// Core2 -- default + isX86_64() +// AMD K5 -- preferPentium1() +// AMD K6 -- + mmx() +// AMD K6-II -- + mmx() + 3dnow() +// AMD K7 -- preferAthlon() +// AMD K8 -- + sse2() +// AMD K10 -- + isX86_64() +// Cyrix 6x86 -- preferPentium1() +// 6x86MX -- + mmx() + +public: + +/// Cache size and behaviour +struct CacheInfo +{ + /// Size of the cache, in kilobytes, per CPU. + /// For L1 unified (data + code) caches, this size is half the physical size. + /// (we don't halve it for larger sizes, since normally + /// data size is much greater than code size for critical loops). + uint size; + /// Number of ways of associativity, eg: + /// 1 = direct mapped + /// 2 = 2-way set associative + /// 3 = 3-way set associative + /// ubyte.max = fully associative + ubyte associativity; + /// Number of bytes read into the cache when a cache miss occurs. + uint lineSize; +} + +public: + /// Returns vendor string, for display purposes only. + /// Do NOT use this to determine features! + /// Note that some CPUs have programmable vendorIDs. + char[] vendor() {return vendorID;} + /// Returns processor string, for display purposes only + char[] processor() {return processorName;} + + /// The data caches. If there are fewer than 5 physical caches levels, + /// the remaining levels are set to uint.max (== entire memory space) + __gshared CacheInfo[5] datacache; + /// Does it have an x87 FPU on-chip? + bool x87onChip() {return (features&FPU_BIT)!=0;} + /// Is MMX supported? + bool mmx() {return (features&MMX_BIT)!=0;} + /// Is SSE supported? + bool sse() {return (features&SSE_BIT)!=0;} + /// Is SSE2 supported? + bool sse2() {return (features&SSE2_BIT)!=0;} + /// Is SSE3 supported? + bool sse3() {return (miscfeatures&SSE3_BIT)!=0;} + /// Is SSSE3 supported? + bool ssse3() {return (miscfeatures&SSSE3_BIT)!=0;} + /// Is SSE4.1 supported? + bool sse41() {return (miscfeatures&SSE41_BIT)!=0;} + /// Is SSE4.2 supported? + bool sse42() {return (miscfeatures&SSE42_BIT)!=0;} + /// Is SSE4a supported? + bool sse4a() {return (amdmiscfeatures&SSE4A_BIT)!=0;} + /// Is SSE5 supported? + bool sse5() {return (amdmiscfeatures&SSE5_BIT)!=0;} + /// Is AMD 3DNOW supported? + bool amd3dnow() {return (amdfeatures&AMD_3DNOW_BIT)!=0;} + /// Is AMD 3DNOW Ext supported? + bool amd3dnowExt() {return (amdfeatures&AMD_3DNOW_EXT_BIT)!=0;} + /// Are AMD extensions to MMX supported? + bool amdMmx() {return (amdfeatures&AMD_MMX_BIT)!=0;} + /// Is fxsave/fxrstor supported? + bool hasFxsr() {return (features&FXSR_BIT)!=0;} + /// Is cmov supported? + bool hasCmov() {return (features&CMOV_BIT)!=0;} + /// Is rdtsc supported? + bool hasRdtsc() {return (features&TIMESTAMP_BIT)!=0;} + /// Is cmpxchg8b supported? + bool hasCmpxchg8b() {return (features&CMPXCHG8B_BIT)!=0;} + /// Is cmpxchg8b supported? + bool hasCmpxchg16b() {return (miscfeatures&CMPXCHG16B_BIT)!=0;} + /// Is 3DNow prefetch supported? + bool has3dnowPrefetch() + {return (amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;} + /// Are LAHF and SAHF supported in 64-bit mode? + bool hasLahfSahf() {return (amdmiscfeatures&LAHFSAHF_BIT)!=0;} + /// Is POPCNT supported? + bool hasPopcnt() {return (miscfeatures&POPCNT_BIT)!=0;} + /// Is LZCNT supported? + bool hasLzcnt() {return (amdmiscfeatures&LZCNT_BIT)!=0;} + /// Is this an Intel64 or AMD 64? + bool isX86_64() {return (amdfeatures&AMD64_BIT)!=0;} + + /// Is this an IA64 (Itanium) processor? + bool isItanium() { return (features&IA64_BIT)!=0; } + + /// Is hyperthreading supported? + bool hyperThreading() { return maxThreads>maxCores; } + /// Returns number of threads per CPU + uint threadsPerCPU() {return maxThreads;} + /// Returns number of cores in CPU + uint coresPerCPU() {return maxCores;} + + /// Optimisation hints for assembly code. + /// For forward compatibility, the CPU is compared against different + /// microarchitectures. For 32-bit X86, comparisons are made against + /// the Intel PPro/PII/PIII/PM family. + /// + /// The major 32-bit x86 microarchitecture 'dynasties' have been: + /// (1) Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). + /// (2) AMD Athlon (K7, K8, K10). + /// (3) Intel NetBurst (Pentium 4, Pentium D). + /// (4) In-order Pentium (Pentium1, PMMX) + /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta, + /// Cyrix, Rise) were mostly in-order. + /// Some new processors do not fit into the existing categories: + /// Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. + /// Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. + /// + /// Within each dynasty, the optimisation techniques are largely + /// identical (eg, use instruction pairing for group 4). Major + /// instruction set improvements occur within each group. + + /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code? + bool preferAthlon() { return probablyAMD && family >=6; } + /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code? + bool preferPentium4() { return probablyIntel && family == 0xF; } + /// Does this CPU perform better on Pentium I code than Pentium Pro code? + bool preferPentium1() { return family < 6 || (family==6 && model < 0xF && !probablyIntel); } + +__gshared: +public: + /// Processor type (vendor-dependent). + /// This should be visible ONLY for display purposes. + uint stepping, model, family; + uint numCacheLevels = 1; +private: + bool probablyIntel; // true = _probably_ an Intel processor, might be faking + bool probablyAMD; // true = _probably_ an AMD processor + char [12] vendorID; + char [] processorName; + char [48] processorNameBuffer; + uint features = 0; // mmx, sse, sse2, hyperthreading, etc + uint miscfeatures = 0; // sse3, etc. + uint amdfeatures = 0; // 3DNow!, mmxext, etc + uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc + uint maxCores = 1; + uint maxThreads = 1; + // Note that this may indicate multi-core rather than hyperthreading. + bool hyperThreadingBit() { return (features&HTT_BIT)!=0;} + + // feature flags CPUID1_EDX + enum : uint + { + FPU_BIT = 1, + TIMESTAMP_BIT = 1<<4, // rdtsc + MDSR_BIT = 1<<5, // RDMSR/WRMSR + CMPXCHG8B_BIT = 1<<8, + CMOV_BIT = 1<<15, + MMX_BIT = 1<<23, + FXSR_BIT = 1<<24, + SSE_BIT = 1<<25, + SSE2_BIT = 1<<26, + HTT_BIT = 1<<28, + IA64_BIT = 1<<30 + } + // feature flags misc CPUID1_ECX + enum : uint + { + SSE3_BIT = 1, + PCLMULQDQ_BIT = 1<<1, // from AVX + MWAIT_BIT = 1<<3, + SSSE3_BIT = 1<<9, + FMA_BIT = 1<<12, // from AVX + CMPXCHG16B_BIT = 1<<13, + SSE41_BIT = 1<<19, + SSE42_BIT = 1<<20, + POPCNT_BIT = 1<<23, + AES_BIT = 1<<25, // AES instructions from AVX + OSXSAVE_BIT = 1<<27, // Used for AVX + AVX_BIT = 1<<28 + } +/+ +version(X86_64) { + bool hasAVXinHardware() { + // This only indicates hardware support, not OS support. + return (miscfeatures&AVX_BIT) && (miscfeatures&OSXSAVE_BIT); + } + // Is AVX supported (in both hardware & OS)? + bool Avx() { + if (!hasAVXinHardware()) return false; + // Check for OS support + uint xfeatures; + asm {mov ECX, 0; xgetbv; mov xfeatures, EAX; } + return (xfeatures&0x6)==6; + } + bool hasAvxFma() { + if (!AVX()) return false; + return (features&FMA_BIT)!=0; + } +} ++/ + // AMD feature flags CPUID80000001_EDX + enum : uint + { + AMD_MMX_BIT = 1<<22, +// FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions. + FFXSR_BIT = 1<<25, + PAGE1GB_BIT = 1<<26, // support for 1GB pages + RDTSCP_BIT = 1<<27, + AMD64_BIT = 1<<29, + AMD_3DNOW_EXT_BIT = 1<<30, + AMD_3DNOW_BIT = 1<<31 + } + // AMD misc feature flags CPUID80000001_ECX + enum : uint + { + LAHFSAHF_BIT = 1, + LZCNT_BIT = 1<<5, + SSE4A_BIT = 1<<6, + AMD_3DNOW_PREFETCH_BIT = 1<<8, + SSE5_BIT = 1<<11 + } + +version(GNU){ + // GDC is a filthy liar. It can't actually do inline asm. +} else version(D_InlineAsm_X86) { + version = Really_D_InlineAsm_X86; +} + +version(Really_D_InlineAsm_X86) { +// Note that this code will also work for Itanium in x86 mode. + +shared uint max_cpuid, max_extended_cpuid; + +// CPUID2: "cache and tlb information" +void getcacheinfoCPUID2() +{ + // We are only interested in the data caches + void decipherCpuid2(ubyte x) { + if (x==0) return; + // Values from http://www.sandpile.org/ia32/cpuid.htm. + // Includes Itanium and non-Intel CPUs. + // + immutable ubyte [47] ids = [ + 0x0A, 0x0C, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68, + // level 2 cache + 0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F, + 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E, + 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81, + // level 3 cache + 0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D + ]; + immutable uint [47] sizes = [ + 8, 16, 32, 16, 24, 8, 16, 32, + 128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512, + 256, 512, 1024, 2048, 512, 1024, 4096, 6*1024, + 128, 192, 128, 256, 384, 512, 3072, 512, 128, + 512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024 + ]; + // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative + immutable ubyte [47] ways = [ + 2, 4, 8, 8, 6, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2, + 8, 8, 8, 8, 4, 8, 16, 24, + 4, 6, 2, 4, 6, 4, 12, 8, 8, + 4, 8, 8, 8, 4, 8, 12, 16, 12, 16 + ]; + enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 } + for (int i=0; i< ids.length; ++i) { + if (x==ids[i]) { + int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2; + if (x==0x49 && family==0xF && model==0x6) level=2; + datacache[level].size=sizes[i]; + datacache[level].associativity=ways[i]; + if (level == 3 || x==0x2C || (x>=0x48 && x<=0x80) + || x==0x86 || x==0x87 + || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){ + datacache[level].lineSize = 64; + } else datacache[level].lineSize = 32; + } + } + } + + uint[4] a; + bool firstTime = true; + // On a multi-core system, this could theoretically fail, but it's only used + // for old single-core CPUs. + uint numinfos = 1; + do { + asm { + mov EAX, 2; + cpuid; + mov a, EAX; + mov a+4, EBX; + mov a+8, ECX; + mov a+12, EDX; + } + if (firstTime) { + if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) { + // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080. + // These are NOT standard Intel values + // (TLB = 32 entry, 4 way associative, 4K pages) + // (L1 cache = 16K, 4way, linesize16) + datacache[0].size=8; + datacache[0].associativity=4; + datacache[0].lineSize=16; + return; + } + // lsb of a is how many times to loop. + numinfos = a[0] & 0xFF; + // and otherwise it should be ignored + a[0] &= 0xFFFF_FF00; + firstTime = false; + } + for (int c=0; c<4;++c) { + // high bit set == no info. + if (a[c] & 0x8000_0000) continue; + decipherCpuid2(cast(ubyte)(a[c] & 0xFF)); + decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF)); + decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF)); + decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF)); + } + } while (--numinfos); +} + +// CPUID4: "Deterministic cache parameters" leaf +void getcacheinfoCPUID4() +{ + int cachenum = 0; + for(;;) { + uint a, b, number_of_sets; + asm { + mov EAX, 4; + mov ECX, cachenum; + cpuid; + mov a, EAX; + mov b, EBX; + mov number_of_sets, ECX; + } + ++cachenum; + if ((a&0x1F)==0) break; // no more caches + uint numthreads = ((a>>14) & 0xFFF) + 1; + uint numcores = ((a>>26) & 0x3F) + 1; + if (numcores > maxCores) maxCores = numcores; + if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches + + ++number_of_sets; + ubyte level = cast(ubyte)(((a>>5)&7)-1); + if (level > datacache.length) continue; // ignore deep caches + datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1); + datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size + uint line_partitions = ((b >> 12)& 0x3FF) + 1; + // Size = number of sets * associativity * cachelinesize * linepartitions + // and must convert to Kb, also dividing by the number of hyperthreads using this cache. + ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets * + datacache[level].associativity : number_of_sets; + datacache[level].size = cast(uint)( + (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024)); + if (level == 0 && (a&0xF)==3) { + // Halve the size for unified L1 caches + datacache[level].size/=2; + } + } +} + +// CPUID8000_0005 & 6 +void getAMDcacheinfo() +{ + uint c5, c6, d6; + asm { + mov EAX, 0x8000_0005; // L1 cache + cpuid; + // EAX has L1_TLB_4M. + // EBX has L1_TLB_4K + // EDX has L1 instruction cache + mov c5, ECX; + } + + datacache[0].size = ( (c5>>24) & 0xFF); + datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF); + datacache[0].lineSize = c5 & 0xFF; + + if (max_extended_cpuid >= 0x8000_0006) { + // AMD K6-III or K6-2+ or later. + ubyte numcores = 1; + if (max_extended_cpuid >=0x8000_0008) { + asm { + mov EAX, 0x8000_0008; + cpuid; + mov numcores, CL; + } + ++numcores; + if (numcores>maxCores) maxCores = numcores; + } + asm { + mov EAX, 0x8000_0006; // L2/L3 cache + cpuid; + mov c6, ECX; // L2 cache info + mov d6, EDX; // L3 cache info + } + + immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ]; + datacache[1].size = (c6>>16) & 0xFFFF; + datacache[1].associativity = assocmap[(c6>>12)&0xF]; + datacache[1].lineSize = c6 & 0xFF; + + // The L3 cache value is TOTAL, not per core. + datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1. + datacache[2].associativity = assocmap[(d6>>12)&0xF]; + datacache[2].lineSize = d6 & 0xFF; + } +} + + +void cpuidX86() +{ + char * venptr = vendorID.ptr; + asm { + mov EAX, 0; + cpuid; + mov max_cpuid, EAX; + mov EAX, venptr; + mov [EAX], EBX; + mov [EAX + 4], EDX; + mov [EAX + 8], ECX; + mov EAX, 0x8000_0000; + cpuid; + mov max_extended_cpuid, EAX; + } + + probablyIntel = vendorID == "GenuineIntel"; + probablyAMD = vendorID == "AuthenticAMD"; + uint a, b, c, d; + uint apic = 0; // brand index, apic id + asm { + mov EAX, 1; // model, stepping + cpuid; + mov a, EAX; + mov apic, EBX; + mov miscfeatures, ECX; + mov features, EDX; + } + amdfeatures = 0; + amdmiscfeatures = 0; + if (max_extended_cpuid >= 0x8000_0001) { + asm { + mov EAX, 0x8000_0001; + cpuid; + mov amdmiscfeatures, ECX; + mov amdfeatures, EDX; + } + } + // Try to detect fraudulent vendorIDs + if (amd3dnow) probablyIntel = false; + + stepping = a & 0xF; + uint fbase = (a >> 8) & 0xF; + uint mbase = (a >> 4) & 0xF; + family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase; + model = ((fbase == 0xF) || (fbase == 6 && probablyIntel) ) ? + mbase + ((a >> 12) & 0xF0) : mbase; + + if (!probablyIntel && max_extended_cpuid >= 0x8000_0008) { + // determine max number of cores for AMD + asm { + mov EAX, 0x8000_0008; + cpuid; + mov c, ECX; + } + uint apicsize = (c>>12) & 0xF; + if (apicsize == 0) { + // use legacy method + if (hyperThreadingBit) maxCores = c & 0xFF; + else maxCores = 1; + } else { + // maxcores = 2^ apicsize + maxCores = 1; + while (apicsize) { maxCores<<=1; --apicsize; } + } + } + + if (max_extended_cpuid >= 0x8000_0004) { + char *procptr = processorNameBuffer.ptr; + asm { + push ESI; + mov ESI, procptr; + mov EAX, 0x8000_0002; + cpuid; + mov [ESI], EAX; + mov [ESI+4], EBX; + mov [ESI+8], ECX; + mov [ESI+12], EDX; + mov EAX, 0x8000_0003; + cpuid; + mov [ESI+16], EAX; + mov [ESI+20], EBX; + mov [ESI+24], ECX; + mov [ESI+28], EDX; + mov EAX, 0x8000_0004; + cpuid; + mov [ESI+32], EAX; + mov [ESI+36], EBX; + mov [ESI+40], ECX; + mov [ESI+44], EDX; + pop ESI; + } + // Intel P4 and PM pad at front with spaces. + // Other CPUs pad at end with nulls. + int start = 0, end = 0; + while (processorNameBuffer[start] == ' ') { ++start; } + while (processorNameBuffer[$-end-1] == 0) { ++end; } + processorName = processorNameBuffer[start..$-end]; + } else { + processorName[] = "Unknown CPU"; + } + // Determine cache sizes + + // Intel docs specify that they return 0 for 0x8000_0005. + // AMD docs do not specify the behaviour for 0004 and 0002. + // Centaur/VIA and most other manufacturers use the AMD method, + // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2! + // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour + // for CPUID80000005. But Geode GX uses the AMD method + + // Deal with Geode GX1 - make it same as MediaGX MMX. + if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) { + max_extended_cpuid = 0x8000_0004; + } + // Therefore, we try the AMD method unless it's an Intel chip. + // If we still have no info, try the Intel methods. + datacache[0].size = 0; + if (max_cpuid<2 || !probablyIntel) { + if (max_extended_cpuid >= 0x8000_0005) { + getAMDcacheinfo(); + } else if (probablyAMD) { + // According to AMDProcRecognitionAppNote, this means CPU + // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4) + // Am5x86 has 16Kb 4-way unified data & code cache. + datacache[0].size = 8; + datacache[0].associativity = 4; + datacache[0].lineSize = 32; + } else { + // Some obscure CPU. + // Values for Cyrix 6x86MX (family 6, model 0) + datacache[0].size = 64; + datacache[0].associativity = 4; + datacache[0].lineSize = 32; + } + } + if ((datacache[0].size == 0) && max_cpuid>=4) { + getcacheinfoCPUID4(); + } + if ((datacache[0].size == 0) && max_cpuid>=2) { + getcacheinfoCPUID2(); + } + if (datacache[0].size == 0) { + // Pentium, PMMX, late model 486, or an obscure CPU + if (mmx) { // Pentium MMX. Also has 8kB code cache. + datacache[0].size = 16; + datacache[0].associativity = 4; + datacache[0].lineSize = 32; + } else { // Pentium 1 (which also has 8kB code cache) + // or 486. + // Cyrix 6x86: 16, 4way, 32 linesize + datacache[0].size = 8; + datacache[0].associativity = 2; + datacache[0].lineSize = 32; + } + } + if (hyperThreadingBit) maxThreads = (apic>>>16) & 0xFF; + else maxThreads = maxCores; +} + +// Return true if the cpuid instruction is supported. +// BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines. +bool hasCPUID() +{ + uint flags; + asm { + pushfd; + pop EAX; + mov flags, EAX; + xor EAX, 0x0020_0000; + push EAX; + popfd; + pushfd; + pop EAX; + xor flags, EAX; + } + return (flags & 0x0020_0000) !=0; +} + +} else { // inline asm X86 + + bool hasCPUID() { return false; } + + void cpuidX86() + { + datacache[0].size = 8; + datacache[0].associativity = 2; + datacache[0].lineSize = 32; + } +} + +// TODO: Implement this function with OS support +void cpuidPPC() +{ + enum :int { PPC601, PPC603, PPC603E, PPC604, + PPC604E, PPC620, PPCG3, PPCG4, PPCG5 }; + + // TODO: + // asm { mfpvr; } returns the CPU version but unfortunately it can + // only be used in kernel mode. So OS support is required. + int cputype = PPC603; + + // 601 has a 8KB combined data & code L1 cache. + uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64]; + ubyte ways[] = [8, 2, 4, 4, 4, 8, 8, 8, 8]; + uint L2size[]= [0, 0, 0, 0, 0, 0, 0, 256, 512]; + uint L3size[]= [0, 0, 0, 0, 0, 0, 0, 2048, 0]; + + datacache[0].size = sizes[cputype]; + datacache[0].associativity = ways[cputype]; + datacache[0].lineSize = (cputype==PPCG5)? 128 : + (cputype == PPC620 || cputype == PPCG3)? 64 : 32; + datacache[1].size = L2size[cputype]; + datacache[2].size = L3size[cputype]; + datacache[1].lineSize = datacache[0].lineSize; + datacache[2].lineSize = datacache[0].lineSize; +} + +// TODO: Implement this function with OS support +void cpuidSparc() +{ + // UltaSparcIIi : L1 = 16, 2way. L2 = 512, 4 way. + // UltraSparcIII : L1 = 64, 4way. L2= 4096 or 8192. + // UltraSparcIIIi: L1 = 64, 4way. L2= 1024, 4 way + // UltraSparcIV : L1 = 64, 4way. L2 = 16*1024. + // UltraSparcIV+ : L1 = 64, 4way. L2 = 2048, L3=32*1024. + // Sparc64V : L1 = 128, 2way. L2 = 4096 4way. +} + + +static this() +{ + if (hasCPUID()) { + cpuidX86(); + } else { + // it's a 386 or 486, or a Cyrix 6x86. + //Probably still has an external cache. + } + if (datacache[0].size==0) { + // Guess same as Pentium 1. + datacache[0].size = 8; + datacache[0].associativity = 2; + datacache[0].lineSize = 32; + } + numCacheLevels = 1; + // And now fill up all the unused levels with full memory space. + for (int i=1; i< datacache.length; ++i) { + if (datacache[i].size==0) { + // Set all remaining levels of cache equal to full address space. + datacache[i].size = uint.max/1024; + datacache[i].associativity = 1; + datacache[i].lineSize = datacache[i-1].lineSize; + } else numCacheLevels = i+1; + } +}