# HG changeset patch # User Anders Johnsen # Date 1218557974 -7200 # Node ID e0551773a005ed2570bc2b0813bc7457d382cf8e # Parent d3c148ca429b86498eba8f68f7b8fa1445b1d895 Added the correct version. diff -r d3c148ca429b -r e0551773a005 src/basic/Attribute.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/basic/Attribute.d Tue Aug 12 18:19:34 2008 +0200 @@ -0,0 +1,128 @@ +module basic.Attribute; + +import tango.core.BitManip; +import Integer = tango.text.convert.Integer; + +import tango.io.Stdout; + +enum Protection : uint +{ + Private = 1<<0, + Public = 1<<1, + Package = 1<<2, + Protected = 1<<3, + Export = 1<<4, +} + +enum Extern : uint +{ + C = 1<<12, + CPlusPlus = 1<<13, + D = 1<<14, + Windows = 1<<15, + Pascal = 1<<16, + System = 1<<17, +} + +struct Attribute +{ + + void setProtection(Protection p) + { + att &= 0xFFFFFFE0; + att |= p; + } + + Protection getProtection() + { + Protection p = Protection.Public; + switch(bsf(0xF0000000 | att)) + { + case 0: + p = Protection.Private; + break; + case 1: + p = Protection.Public; + break; + case 2: + p = Protection.Package; + break; + case 3: + p = Protection.Protected; + break; + case 4: + p = Protection.Export; + break; + default: + break; + } + return p; + } + + void setExtern(Extern e) + { + att &= 0xFF7C0FFF; + att |= e; + } + + Extern getExtern() + { + Extern e = Extern.D; + switch(bsf(0xF0000000 | att >> 12)) + { + case 0: + e = Extern.C; + break; + case 1: + e = Extern.CPlusPlus; + break; + case 2: + e = Extern.D; + break; + case 3: + e = Extern.Windows; + break; + case 4: + e = Extern.Pascal; + break; + case 5: + e = Extern.System; + break; + default: + break; + } + return e; + } + + void setStatic() { att |= Static; } + bool getStatic() { return att & Static ? true : false; } + void setFinal() { att |= Final; } + bool getFinal() { return att & Final ? true : false; } + void setConst() { att |= Const; } + bool getConst() { return att & Const ? true : false; } + void setAbstract() { att |= Abstract; } + bool getAbstract() { return att & Abstract ? true : false; } + void setOverride() { att |= Override; } + bool getOverride() { return att & Override ? true : false; } + void setDeprecated(){ att |= Deprecated; } + bool getDeprecated(){ return att & Deprecated ? true : false; } + void setAuto() { att |= Auto; } + bool getAuto() { return att & Auto ? true : false; } + + char[] toString() + { + return Integer.toString(att); + } + +private: + uint att; + + + static const uint Static = 1<<5; + static const uint Final = 1<<6; + static const uint Const = 1<<7; + static const uint Abstract = 1<<8; + static const uint Override = 1<<9; + static const uint Deprecated = 1<<10; + static const uint Auto = 1<<11; +} diff -r d3c148ca429b -r e0551773a005 src/basic/LiteralParsing.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/basic/LiteralParsing.d Tue Aug 12 18:19:34 2008 +0200 @@ -0,0 +1,900 @@ +module basic.LiteralParsing; + +import basic.SourceLocation, + basic.Message, + basic.conv; + +import tango.io.Stdout, + tango.core.BitManip, + Integer = tango.text.convert.Integer, + Utf = tango.text.convert.Utf, + tango.text.Util; + +enum StringType +{ + Char, + WChar, + DChar +} + +enum NumberType +{ + Int, + UInt, + Long, + ULong, + Float, + Double, + Real +} + +struct String +{ + StringType type; + ubyte[] data; +} + +struct Number +{ + NumberType type; + ulong integer; + real floating; +} + +private struct EscapeReturn +{ + ubyte[] data; + int length; +} + +private struct NumberReturn +{ + char[] data; + int length; +} + +Number parseNumber(char[] str, SourceLocation loc, MessageHandler messages) +{ + Number num; + + switch(str[0]) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if(str.contains('.') || str.contains('e') || str.contains('E')) + { + auto n = parseRealNumber(str, loc, messages); + + try + { + num.floating = toReal(n.data); + num.type = NumberType.Double; + } + catch(Exception e) + { + num.floating = real.init; + messages.report(FloatingToLarge, loc, loc + n.length - 1); + } + + if(num.floating > double.max) + num.type = NumberType.Real; + } + else + { + auto n = parseDecimalDigits(str, loc, messages); + + try + { + num.integer = toUlong(n.data); + } + catch(Exception e) + { + num.integer = 0; + messages.report(IntegerToLarge, loc, loc + n.length - 1); + } + + if(num.integer > uint.max) + num.type = NumberType.Long; + if(num.integer > long.max) + num.type = NumberType.ULong; + } + break; + default: + messages.report(InvalidStartInteger, loc, loc+1); + } + +// printNumber(str, num); + return num; +} + +NumberReturn parseDecimalDigits(char[] str, SourceLocation loc, MessageHandler messages) +{ + int i = 0; + + char[] number; + + bool end; + while(!end) + { + switch(str[i]) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + number ~= str[i]; + break; + case '_': + break; + default: + end = true; + } + i++; + if(str.length == i) + { + end = true; + i++; + } + } + + NumberReturn res; + res.length = i - 1; + res.data = number; + + return res; +} + +NumberReturn parseRealNumber(char[] str, SourceLocation loc, MessageHandler messages) +{ + int i = 0; + + bool dot, e; + char[] number; + + NumberReturn num; + + bool end; + while(!end) + { + switch(str[i]) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '_': + auto n = parseDecimalDigits(str[i..$], loc, messages); + number ~= n.data; + i += n.length; + break; + case '.': + if(e) + messages.report(FloatingDotInE, loc + i, loc + i + 1); + else if(dot) + messages.report(OnlyOneDotFloating, loc + i, loc + i + 1); + else + { + dot = true; + number ~= str[i]; + } + i++; + break; + case 'e': + case 'E': + if(e) + messages.report(OnlyOneEFloating, loc + i, loc + i + 1); + else + { + e = true; + number ~= str[i]; + } + i++; + break; + case '+': + case '-': + if (number[$-1] != 'e' && + number[$-1] != 'E') + messages.report(FloatingBadLocation, loc + i, loc + i + 1) + .arg(str[i]); + else + number ~= str[i]; + i++; + break; + default: + end = true; + } + if(str.length == i) + end = true; + } + + if (number[$-1] == '+' || + number[$-1] == '-' || + number[$-1] == 'e' || + number[$-1] == 'E') + { + messages.report(FloatingInvalidEnd, loc + i - 1, loc + i); + return num; + } + + num.data = number; + num.length = i; + + return num; +} + + +void printNumber(char[] str, Number num) +{ + Stdout(str)(" have become").newline; + switch(num.type) + { + case NumberType.Int: + Stdout(num.integer)(" of type ")("int"); + break; + case NumberType.UInt: + Stdout(num.integer)(" of type ")("uint"); + break; + case NumberType.Long: + Stdout(num.integer)(" of type ")("long"); + break; + case NumberType.ULong: + Stdout(num.integer)(" of type ")("ulong"); + break; + case NumberType.Float: + Stdout(num.floating)(" of type ")("float"); + break; + case NumberType.Double: + Stdout(num.floating)(" of type ")("double"); + break; + case NumberType.Real: + Stdout(num.floating)(" of type ")("real"); + break; + } + Stdout().newline; +} + + +String parseString(char[] str, SourceLocation loc, MessageHandler messages) +{ + String strBuf; + strBuf.data.length = str.length; + strBuf.data.length = 0; + + switch(str[0]) + { + case 'r': + strBuf = parseWysiwygString(str[1..$], strBuf); + break; + case '`': + strBuf = parseWysiwygString(str, strBuf); + break; + case '"': + strBuf = parseDoubleQuotedString(str, strBuf, loc, messages); + break; + case 'x': + strBuf = parseHexString(str[1..$], strBuf, loc + 1, messages); + break; + default: + messages.report(InvalidStrPrefix, loc, loc + 1); + + } + +// printString(str, strBuf); + + return strBuf; +} + +String parseHexString(char[] str, String strBuf, + SourceLocation loc, MessageHandler messages) +{ + int i = 1; // first char is " + char[] hex = "0123456789abcdefABCDEF"; + char[] whitespace = "\r\n "; + char[] hexBuf; + + while(str[i] != '"') + { + if(hex.contains(str[i])) + { + hexBuf ~= str[i]; + if(hexBuf.length == 2) + { + strBuf.data ~= Integer.toInt(hexBuf, 16); + hexBuf.length = 0; + } + } + else if(!whitespace.contains(str[i])) + messages.report(InvalidHexStrChar, loc + i, loc + i + 1); + + i++; + } + + + + return strBuf; +} +// + +String parseDoubleQuotedString(char[] str, String strBuf, + SourceLocation loc, MessageHandler messages) +{ + int i = 1; // first char is " + + while(str[i] != '"') + { + switch(str[i]) + { + case '\\': // EscapeSequence + EscapeReturn res = parseEscapeSequence(str[i..$], loc + i, messages); + strBuf.data ~= res.data; + i += res.length; + break; + default: + strBuf.data ~= str[i]; + i++; + } + if(i >= str.length) + break; + } + + if(str.length > i + 1) // Then we have a postfix. Lexer makes sure this is c, w or d. + switch(str[i+1]) + { + case 'c': + break; + case 'w': + strBuf.data = cast(ubyte[])Utf.toString16(cast(char[])strBuf.data); + strBuf.type = StringType.WChar; + break; + case 'd': + strBuf.data = cast(ubyte[])Utf.toString32(cast(char[])strBuf.data); + strBuf.type = StringType.DChar; + break; + } + + + return strBuf; +} + +EscapeReturn parseEscapeSequence(char[] str, + SourceLocation loc, MessageHandler messages) +{ + EscapeReturn res; + + switch(str[1]) + { + case '\'': + res.length = 2; + res.data ~= '\''; + break; + case '"': + res.length = 2; + res.data ~= '\"'; + break; + case '?': + res.length = 2; + res.data ~= '\?'; + break; + case '\\': + res.length = 2; + res.data ~= '\\'; + break; + case 'a': + res.length = 2; + res.data ~= '\a'; + break; + case 'b': + res.length = 2; + res.data ~= '\b'; + break; + case 'f': + res.length = 2; + res.data ~= '\f'; + break; + case 'n': + res.length = 2; + res.data ~= '\n'; + break; + case 'r': + res.length = 2; + res.data ~= '\r'; + break; + case 't': + res.length = 2; + res.data ~= '\t'; + break; + case 'v': + res.length = 2; + res.data ~= '\v'; + break; + case 'x': + char[] hex = "0123456789abcdefABCDEF"; + char[] hexBuf; + if(str.length - 1 >= 4) + { + for(int i = 2; i < 4; i++) + if(hex.contains(str[i])) + hexBuf ~= str[i]; + else + messages.report(StringHexInvalid, loc + i, loc + i + 1) + .arg(Integer.toString(i-1)) + .arg(Integer.toString(2)); + res.length = 4; + } + else + { + messages.report(StringShortEscape, loc, loc + str.length); + res.length = str.length - 1; + } + res.data ~= cast(ubyte)Integer.toInt(hexBuf, 16); + break; + case 'u': + char[] hex = "0123456789abcdefABCDEF"; + char[] hexBuf; + if(str.length - 1 >= 6) + { + for(int i = 2; i < 6; i++) + if(hex.contains(str[i])) + hexBuf ~= str[i]; + else + messages.report(StringHexInvalid, loc + i, loc + i + 1) + .arg(Integer.toString(i-1)) + .arg(Integer.toString(6)); + res.length = 6; + } + else + { + messages.report(StringShortEscape, loc, loc + str.length); + res.length = str.length - 1; + } + uint i = Integer.toLong(hexBuf, 16); + if(!isValidUtf8(i)) + messages.report(InvalidUtf8Hex, loc, loc+6); + else + res.data ~= parseToUtf8(i); + break; + case 'U': + char[] hex = "0123456789abcdefABCDEF"; + char[] hexBuf; + if(str.length - 1 >= 10) + { + for(int i = 2; i < 10; i++) + if(hex.contains(str[i])) + hexBuf ~= str[i]; + else + messages.report(StringHexInvalid, loc + i, loc + i + 1) + .arg(Integer.toString(i-1)) + .arg(Integer.toString(10)); + res.length = 10; + } + else + { + messages.report(StringShortEscape, loc, loc + str.length); + res.length = str.length - 1; + } + uint i = Integer.toLong(hexBuf, 16); + if(!isValidUtf8(i)) + messages.report(InvalidUtf8Hex, loc, loc+10); + else + res.data ~= parseToUtf8(i); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + char[] oct = "01234567"; + char[] octBuf; + octBuf ~= str[1]; + res.length = 2; + for(int i = 2; i < 4; i++) + if(oct.contains(str[i])) + { + octBuf ~= str[i]; + res.length += 1; + } + else + break; + + uint i = Integer.toLong(octBuf, 8); + res.data ~= i; + break; + case '&': + int i = 2; + char[] s; + while(str[i] != ';') + { + if(str[i] == '"') + { + messages.report(NoCharEntityEnd, loc+i, loc+i+1); + res.length = 2; + break; + } + s ~= str[i]; + i++; + } + + if ( s in characterEntities ) + { + res.data ~= parseToUtf8(characterEntities[s]); + } + else + messages.report(InvalidCharEntity, loc + 2, loc + i); + + res.length = i + 1; // remember the ; + + break; + default: + messages.report(InvalidStrEscape, loc, loc + 2); + res.length += 2; + } + + return res; +} + +String parseWysiwygString(char[] str, String strBuf) +{ + char start = str[0]; + + int i = 1; + + while(str[i] != start) + { + strBuf.data ~= cast(ubyte)str[i]; + i++; + } + return strBuf; +} + +ubyte[] parseToUtf8(uint i) +{ + if(i <= 0x00007F) + return [cast(ubyte)i]; + else if(i <= 0x0007FF) + { + ubyte a = (i << 26) >> 26; + bts(cast(uint*)&a, 7); + ubyte b = (i << 19) >> 25; + bts(cast(uint*)&b, 7); + bts(cast(uint*)&b, 6); + return [b,a]; + } + else if(i <= 0x00FFFF) + { + ubyte a = (i << 26) >> 26; + bts(cast(uint*)&a, 7); + ubyte b = (i << 20) >> 26; + bts(cast(uint*)&b, 7); + ubyte c = (i << 16) >> 28; + bts(cast(uint*)&c, 7); + bts(cast(uint*)&c, 6); + bts(cast(uint*)&c, 5); + return [c,b,a]; + } + else if(i <= 0x10FFFF) + { + ubyte a = (i << 26) >> 26; + bts(cast(uint*)&a, 7); + ubyte b = (i << 20) >> 26; + bts(cast(uint*)&b, 7); + ubyte c = (i << 14) >> 26; + bts(cast(uint*)&c, 7); + ubyte d = (i << 11) >> 29; + bts(cast(uint*)&d, 7); + bts(cast(uint*)&d, 6); + bts(cast(uint*)&d, 5); + bts(cast(uint*)&d, 4); + return [d,c,b,a]; + } +} + +bool isValidUtf8(uint i) +{ + if(i <= 0x10FFFF) + return true; + return false; +} + +void printString(char[] str, String strBuf) +{ + char[] s; + switch(strBuf.type) + { + case StringType.Char: + Stdout(str)(" have become").newline() + (cast(char[])strBuf.data).newline; + break; + case StringType.WChar: + Stdout(str)(" have become").newline() + (cast(wchar[])strBuf.data).newline; + break; + case StringType.DChar: + Stdout(str)(" have become").newline() + (cast(dchar[])strBuf.data).newline; + break; + } +} + +static ushort[char[]] characterEntities; + +static this() +{ + characterEntities = + [ + "quot"[]: 34, + "amp": 38, + "lt": 60, + "gt": 62, + "OElig": 338, + "oelig": 339, + "Scaron": 352, + "scaron": 353, + "Yuml": 376, + "circ": 710, + "tilde": 732, + "ensp": 8194, + "emsp": 8195, + "thinsp": 8201, + "zwnj": 8204, + "zwj": 8205, + "lrm": 8206, + "rlm": 8207, + "ndash": 8211, + "mdash": 8212, + "lsquo": 8216, + "rsquo": 8217, + "sbquo": 8218, + "ldquo": 8220, + "rdquo": 8221, + "bdquo": 8222, + "dagger": 8224, + "Dagger": 8225, + "permil": 8240, + "lsaquo": 8249, + "rsaquo": 8250, + "euro": 8364, + "nbsp": 160, + "iexcl": 161, + "cent": 162, + "pound": 163, + "curren": 164, + "yen": 165, + "brvbar": 166, + "sect": 167, + "uml": 168, + "copy": 169, + "ordf": 170, + "laquo": 171, + "not": 172, + "shy": 173, + "reg": 174, + "macr": 175, + "deg": 176, + "plusmn": 177, + "sup2": 178, + "sup3": 179, + "acute": 180, + "micro": 181, + "para": 182, + "middot": 183, + "cedil": 184, + "sup1": 185, + "ordm": 186, + "raquo": 187, + "frac14": 188, + "frac12": 189, + "frac34": 190, + "iquest": 191, + "Agrave": 192, + "Aacute": 193, + "Acirc": 194, + "Atilde": 195, + "Auml": 196, + "Aring": 197, + "AElig": 198, + "Ccedil": 199, + "Egrave": 200, + "Eacute": 201, + "Ecirc": 202, + "Euml": 203, + "Igrave": 204, + "Iacute": 205, + "Icirc": 206, + "Iuml": 207, + "ETH": 208, + "Ntilde": 209, + "Ograve": 210, + "Oacute": 211, + "Ocirc": 212, + "Otilde": 213, + "Ouml": 214, + "times": 215, + "Oslash": 216, + "Ugrave": 217, + "Uacute": 218, + "Ucirc": 219, + "Uuml": 220, + "Yacute": 221, + "THORN": 222, + "szlig": 223, + "agrave": 224, + "aacute": 225, + "acirc": 226, + "atilde": 227, + "auml": 228, + "aring": 229, + "aelig": 230, + "ccedil": 231, + "egrave": 232, + "eacute": 233, + "ecirc": 234, + "euml": 235, + "igrave": 236, + "iacute": 237, + "icirc": 238, + "iuml": 239, + "eth": 240, + "ntilde": 241, + "ograve": 242, + "oacute": 243, + "ocirc": 244, + "otilde": 245, + "ouml": 246, + "divide": 247, + "oslash": 248, + "ugrave": 249, + "uacute": 250, + "ucirc": 251, + "uuml": 252, + "yacute": 253, + "thorn": 254, + "yuml": 255, + "fnof": 402, + "Alpha": 913, + "Beta": 914, + "Gamma": 915, + "Delta": 916, + "Epsilon": 917, + "Zeta": 918, + "Eta": 919, + "Theta": 920, + "Iota": 921, + "Kappa": 922, + "Lambda": 923, + "Mu": 924, + "Nu": 925, + "Xi": 926, + "Omicron": 927, + "Pi": 928, + "Rho": 929, + "Sigma": 931, + "Tau": 932, + "Upsilon": 933, + "Phi": 934, + "Chi": 935, + "Psi": 936, + "Omega": 937, + "alpha": 945, + "beta": 946, + "gamma": 947, + "delta": 948, + "epsilon": 949, + "zeta": 950, + "eta": 951, + "theta": 952, + "iota": 953, + "kappa": 954, + "lambda": 955, + "mu": 956, + "nu": 957, + "xi": 958, + "omicron": 959, + "pi": 960, + "rho": 961, + "sigmaf": 962, + "sigma": 963, + "tau": 964, + "upsilon": 965, + "phi": 966, + "chi": 967, + "psi": 968, + "omega": 969, + "thetasym": 977, + "upsih": 978, + "piv": 982, + "bull": 8226, + "hellip": 8230, + "prime": 8242, + "Prime": 8243, + "oline": 8254, + "frasl": 8260, + "weierp": 8472, + "image": 8465, + "real": 8476, + "trade": 8482, + "alefsym": 8501, + "larr": 8592, + "uarr": 8593, + "rarr": 8594, + "darr": 8595, + "harr": 8596, + "crarr": 8629, + "lArr": 8656, + "uArr": 8657, + "rArr": 8658, + "dArr": 8659, + "hArr": 8660, + "forall": 8704, + "part": 8706, + "exist": 8707, + "empty": 8709, + "nabla": 8711, + "isin": 8712, + "notin": 8713, + "ni": 8715, + "prod": 8719, + "sum": 8721, + "minus": 8722, + "lowast": 8727, + "radic": 8730, + "prop": 8733, + "infin": 8734, + "ang": 8736, + "and": 8743, + "or": 8744, + "cap": 8745, + "cup": 8746, + "int": 8747, + "there4": 8756, + "sim": 8764, + "cong": 8773, + "asymp": 8776, + "ne": 8800, + "equiv": 8801, + "le": 8804, + "ge": 8805, + "sub": 8834, + "sup": 8835, + "nsub": 8836, + "sube": 8838, + "supe": 8839, + "oplus": 8853, + "otimes": 8855, + "perp": 8869, + "sdot": 8901, + "lceil": 8968, + "rceil": 8969, + "lfloor": 8970, + "rfloor": 8971, + "lang": 9001, + "rang": 9002, + "loz": 9674, + "spades": 9824, + "clubs": 9827, + "hearts": 9829, + "diams": 9830 + ]; +} diff -r d3c148ca429b -r e0551773a005 src/basic/Message.d --- a/src/basic/Message.d Tue Aug 12 18:14:56 2008 +0200 +++ b/src/basic/Message.d Tue Aug 12 18:19:34 2008 +0200 @@ -10,7 +10,8 @@ import llvm.type; import lexer.Token, - lexer.Lexer; + lexer.Lexer, + sema.DType; import basic.SourceLocation, basic.SourceManager; @@ -41,6 +42,20 @@ return m; } + Message report(uint opcode, SourceRange[] ranges, SourceLocation[] locs) + { + Message m = new Message(opcode, ranges, locs, src_mgr, this); + messages ~= m; + return m; + } + + Message report(uint opcode, SLoc location1, SLoc location2, SLoc location3 = SLoc.Invalid) + { + Message m = new Message(opcode, [SourceRange(location1, location2)][], [location3][], src_mgr, this); + messages ~= m; + return m; + } + void checkErrors(ExitLevel exitlevel = ExitLevel.Normal) { if(messages.length == 0) @@ -83,20 +98,50 @@ this(int opcode, SLoc location, SourceManager src_mgr, MessageHandler msg_handler) { this.src_mgr = src_mgr; - this.location = location; + this.interests ~= location; args ~= Messages[opcode].message; this.type = Messages[opcode].type; this.msg_handler = msg_handler; } + this(int opcode, SourceRange[] locs, SLoc[] interests, + SourceManager src_mgr, MessageHandler msg_handler) + in + { + assert(locs.length + interests.length, "Atleast one location is requiret for a mark"); + } + body + { + this.src_mgr = src_mgr; + this.locs = locs; + this.interests = interests; + args ~= Messages[opcode].message; + this.type = Messages[opcode].type; + this.msg_handler = msg_handler; + haveEnd = true; + } + char[] toString() { char[256] tmp = void; char[] msg = layout(tmp, args); - Lexer l = new Lexer(location, src_mgr, new MessageHandler(src_mgr)); + SLoc location; + if (interests.length) + location = interests[0]; + else + location = locs[0].begin; - Token t = l.next; + int len = 0; + if(!haveEnd) + { + Lexer l = new Lexer(interests[0], src_mgr, new MessageHandler(src_mgr)); + + Token t = l.next; + len = t.length; + } +// else + // len = end - location; if (src_mgr.getRawData(location).length > 0) msg = src_mgr.getLocationAsString(location) ~ ": " ~ msg; @@ -107,8 +152,18 @@ char[] line = src_mgr.getLine(location); char[] marks = line.dup; marks[] = ' '; - size_t p = src_mgr.getColumn(location); - marks[p .. p + t.length] = '^'; + + foreach (s ; locs) + { + size_t p = src_mgr.getColumn(s.begin); + marks[p .. p + (s.end-s.begin)] = interests.length ? '~' : '^'; + } + + foreach (interest ; interests) + { + size_t i = src_mgr.getColumn(interest); + marks[i] = '^'; + } msg ~= "\n "; msg ~= line; @@ -120,7 +175,7 @@ Message arg(char[] s) { - if (args.length == 11) + if (args.length > 10) throw new Exception("Sorry, errors only support up to 10 args"); args ~= s; return this; @@ -140,6 +195,14 @@ return arg([c]); } + Message arg(DType[] types) + { + char[][] res; + foreach (type; types) + res ~= type.name(); + return arg(res); + } + Message fatal(ExitLevel exitlevel = ExitLevel.Normal) { msg_handler.checkErrors(exitlevel); @@ -157,7 +220,10 @@ MessageType type; private: char[][] args; - SLoc location; + SourceRange[] locs; + SLoc[] interests; + bool haveEnd; SourceManager src_mgr; MessageHandler msg_handler; + Token t; } diff -r d3c148ca429b -r e0551773a005 src/basic/Messages.d --- a/src/basic/Messages.d Tue Aug 12 18:14:56 2008 +0200 +++ b/src/basic/Messages.d Tue Aug 12 18:19:34 2008 +0200 @@ -21,13 +21,51 @@ ExpectedCastType, InvalidDeclType, InvalidType, + UnexpectedLinkType, // - imports/module ExpectedIdAfterPackage, RenameMustBeSingleIdent, + UnexpectedEOF, - // Imports + // sema CannotFindModule, + InvalidImplicitCast, + UndefinedIdentifier, + UndefinedType, + MissingMember, + CannotRedeclare, + // - switch + MultipleDefaults, + OverlappingCases, + InvalidCaseValue, + NoConstructor, + NoMachingCon, + CandidateNr, + NoMethodByName, + NoMachingMethod, + CannotReassignSArray, + CanOnlyDerefPointers, + CannotCallMethod, + + // Strings + InvalidStrPrefix, + InvalidStrEscape, + InvalidUtf8Hex, + InvalidHexStrChar, + InvalidCharEntity, + NoCharEntityEnd, + StringShortEscape, + StringHexInvalid, + InvalidStartInteger, + IntegerToLarge, + FloatingToLarge, + FloatingInvalidEnd, + FloatingBadLocation, + FloatingDotInE, + + // Protection + CannotAccessPrivate, } enum MessageType @@ -50,11 +88,13 @@ static this() { Messages = [ + // lexing UnexpectedEOFBlock : E(Err, "Unexpected end of file. Unclosed comment block"), InvalidSymbol : E(Err, "Read invalid symbol: '%0'"), OnlyOneDotFloating : E(Err, "Only one '.' is allowed in an floating number"), OnlyOneEFloating : E(Err, "Only one E is allowed in an floating number"), + // parsing UnexpectedTokMulti : E(Err, "Unexpected token, got %0 expected one of %1"), UnexpectedTokSingle : E(Err, "Unexpected token, got %0 expected %1"), UnexpectedTok : E(Err, "Unexpected token %0"), @@ -67,8 +107,48 @@ InvalidDeclType : E(Err, "Invalid declaration type"), InvalidType : E(Err, "Invalid type"), ExpectedIdAfterPackage : E(Err, "Identifier expected following package"), + UnexpectedLinkType : E(Err, "Invalid linkage type. Only C, C++, D, Windows, Pascal and System is allowed"), + UnexpectedEOF : E(Err, "Unexpected EOF after '%0'"), - CannotFindModule : E(Err, "Cannot find module '%0'") + // sema + CannotFindModule : E(Err, "Cannot find module '%0'"), + InvalidImplicitCast : E(Err, "Cannot make implicit cast between '%0' and '%1'"), + UndefinedIdentifier : E(Err, "Undefined identifier '%0'"), + UndefinedType : E(Err, "Undefined type '%0'"), + MissingMember : E(Err, "%0 %1 has no member %2"), + CannotRedeclare : E(Err, "Cannot redeclare '%0'"), + NoConstructor : E(Err, "No constructor avaible"), + NoMachingCon : E(Err, "No maching constructor. Candidates are:"), + CandidateNr : E(Err, "Candidate number %0"), + NoMethodByName : E(Err, "No method with that name"), + NoMachingMethod : E(Err, "No maching method. Candidates are:"), + // - switch + MultipleDefaults + : E(Err, "Switch statements can't have multiple defaults"), + OverlappingCases + : E(Err, "Can't have multiple cases with the same value." + " Values appearing in multiple cases: %0"), + InvalidCaseValue : E(Err, "Case values must be integers"), + CannotReassignSArray: E(Err, "Cannot reassign static arrays"), + CanOnlyDerefPointers: E(Err, "Can only deref pointers, not '%0'"), + CannotCallMethod : E(Err, "Cannot call a method of type '%0' with '%1'"), + + // literals + InvalidStrPrefix : E(Err, "Invalid string literal prefix"), + InvalidStrEscape : E(Err, "Invalid escape sequence"), + InvalidUtf8Hex : E(Err, "Invalid Utf8 hex char"), + NoCharEntityEnd : E(Err, "Character entity have no end, insert ';'"), + InvalidCharEntity : E(Err, "Invalid character entity"), + InvalidHexStrChar : E(Err, "Invalid character in hex string"), + StringShortEscape : E(Err, "String literal is to short for escape sequence"), + StringHexInvalid : E(Err, "Hex escape sequence have invalid digit at position %0 of %1"), + InvalidStartInteger : E(Err, "Invalid begining of number"), + IntegerToLarge : E(Err, "Integer is to large. Max size is 18446744073709551615"), + FloatingToLarge : E(Err, "Floating literal is to large"), + FloatingInvalidEnd : E(Err, "Floating literal have wrong ending"), + FloatingBadLocation : E(Err, "Bad location for '%0' in floting literal"), + FloatingDotInE : E(Err, "There cannot be a dot in the exponent of a floating literal"), + CannotAccessPrivate : E(Err, "Cannot access private member.") ]; } diff -r d3c148ca429b -r e0551773a005 src/basic/SmallArray.d --- a/src/basic/SmallArray.d Tue Aug 12 18:14:56 2008 +0200 +++ b/src/basic/SmallArray.d Tue Aug 12 18:19:34 2008 +0200 @@ -84,6 +84,19 @@ else ptr = array.ptr; } + alias opCatAssign push; + + T pop() + { + assert(len > 0, "Can't remove from an empty array"); + return ptr[--len]; + } + + T peek() + { + assert(len > 0, "Array is empty"); + return ptr[len - 1]; + } size_t length() { return len; } diff -r d3c148ca429b -r e0551773a005 src/basic/SourceLocation.d --- a/src/basic/SourceLocation.d Tue Aug 12 18:14:56 2008 +0200 +++ b/src/basic/SourceLocation.d Tue Aug 12 18:19:34 2008 +0200 @@ -1,5 +1,7 @@ module basic.SourceLocation; +import Integer = tango.text.convert.Integer; + /// Shorter alias for SourceLocation public alias SourceLocation SLoc; @@ -62,6 +64,12 @@ return res; } + /// Get the length between two location + int opSub(SourceLocation loc) + { + return val - loc.val; + } + /// Creates a SourceLocation from a File ID static SourceLocation fromFileID(uint fileID) { @@ -71,6 +79,10 @@ return res; } + char[] toString() + { + return Integer.toString(val); + } /** Used for invalid/unknown locations. (also the default value, but this is more explicit) diff -r d3c148ca429b -r e0551773a005 src/basic/conv.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/basic/conv.d Tue Aug 12 18:19:34 2008 +0200 @@ -0,0 +1,464 @@ + +// Written in the D programming language. + +/* + * Copyright (C) 2002-2006 by Digital Mars, www.digitalmars.com + * Written by Walter Bright + * Some parts contributed by David L. Davis + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * o The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * o Altered source versions must be plainly marked as such, and must not + * be misrepresented as being the original software. + * o This notice may not be removed or altered from any source + * distribution. + */ + +/*********** + * Conversion building blocks. These differ from the C equivalents + * atoi() and atol() by + * checking for overflow and not allowing whitespace. + * + * For conversion to signed types, the grammar recognized is: + *
+$(I Integer):
+    $(I Sign UnsignedInteger)
+    $(I UnsignedInteger)
+
+$(I Sign):
+    $(B +)
+    $(B -)
+ * 
+ * For conversion to signed types, the grammar recognized is: + *
+$(I UnsignedInteger):
+    $(I DecimalDigit)
+    $(I DecimalDigit) $(I UnsignedInteger)
+ * 
+ * Macros: + * WIKI=Phobos/StdConv + */ + + +/************************* + Changed to be used next to tango. + + Unittests removed. + *************************/ + +module basic.conv; + +private import tango.text.Util; // for atof(), toString() +private import tango.stdc.stringz; +private import tango.stdc.ctype; +private import tango.stdc.stdlib; +private import tango.stdc.errno; +//private import std.c.stdlib; +//private import std.math; // for fabs(), isnan() + +private +{ + extern (C) int getErrno(); + extern (C) int setErrno(int); +} + +//debug=conv; // uncomment to turn on debugging printf's + +/* ************* Exceptions *************** */ + +/** + * Thrown on conversion errors, which happens on deviation from the grammar. + */ +class ConvError : Exception +{ + this(char[] s) + { + super("conversion " ~ s); + } +} + +private void conv_error(char[] s) +{ + throw new ConvError(s); +} + +/** + * Thrown on conversion overflow errors. + */ +class ConvOverflowError : Exception +{ + this(char[] s) + { + super("Error: overflow " ~ s); + } +} + +private void conv_overflow(char[] s) +{ + throw new ConvOverflowError(s); +} + +/*************************************************************** + * Convert character string to the return type. + */ + +int toInt(char[] s) +{ + int length = s.length; + + if (!length) + goto Lerr; + + int sign = 0; + int v = 0; + + for (int i = 0; i < length; i++) + { + char c = s[i]; + if (c >= '0' && c <= '9') + { + if (v < int.max/10 || (v == int.max/10 && c + sign <= '7')) + v = v * 10 + (c - '0'); + else + goto Loverflow; + } + else if (c == '-' && i == 0) + { + sign = -1; + if (length == 1) + goto Lerr; + } + else if (c == '+' && i == 0) + { + if (length == 1) + goto Lerr; + } + else + goto Lerr; + } + if (sign == -1) + { + if (cast(uint)v > 0x80000000) + goto Loverflow; + v = -v; + } + else + { + if (cast(uint)v > 0x7FFFFFFF) + goto Loverflow; + } + return v; + +Loverflow: + conv_overflow(s); + +Lerr: + conv_error(s); + return 0; +} + +/******************************************************* + * ditto + */ + +uint toUint(char[] s) +{ + int length = s.length; + + if (!length) + goto Lerr; + + uint v = 0; + + for (int i = 0; i < length; i++) + { + char c = s[i]; + if (c >= '0' && c <= '9') + { + if (v < uint.max/10 || (v == uint.max/10 && c <= '5')) + v = v * 10 + (c - '0'); + else + goto Loverflow; + } + else + goto Lerr; + } + return v; + +Loverflow: + conv_overflow(s); + +Lerr: + conv_error(s); + return 0; +} + +/******************************************************* + * ditto + */ + +long toLong(char[] s) +{ + int length = s.length; + + if (!length) + goto Lerr; + + int sign = 0; + long v = 0; + + for (int i = 0; i < length; i++) + { + char c = s[i]; + if (c >= '0' && c <= '9') + { + if (v < long.max/10 || (v == long.max/10 && c + sign <= '7')) + v = v * 10 + (c - '0'); + else + goto Loverflow; + } + else if (c == '-' && i == 0) + { + sign = -1; + if (length == 1) + goto Lerr; + } + else if (c == '+' && i == 0) + { + if (length == 1) + goto Lerr; + } + else + goto Lerr; + } + if (sign == -1) + { + if (cast(ulong)v > 0x8000000000000000) + goto Loverflow; + v = -v; + } + else + { + if (cast(ulong)v > 0x7FFFFFFFFFFFFFFF) + goto Loverflow; + } + return v; + +Loverflow: + conv_overflow(s); + +Lerr: + conv_error(s); + return 0; +} + +/******************************************************* + * ditto + */ + +ulong toUlong(char[] s) +{ + int length = s.length; + + if (!length) + goto Lerr; + + ulong v = 0; + + for (int i = 0; i < length; i++) + { + char c = s[i]; + if (c >= '0' && c <= '9') + { + if (v < ulong.max/10 || (v == ulong.max/10 && c <= '5')) + v = v * 10 + (c - '0'); + else + goto Loverflow; + } + else + goto Lerr; + } + return v; + +Loverflow: + conv_overflow(s); + +Lerr: + conv_error(s); + return 0; +} + +/******************************************************* + * ditto + */ + +short toShort(char[] s) +{ + int v = toInt(s); + + if (v != cast(short)v) + goto Loverflow; + + return cast(short)v; + +Loverflow: + conv_overflow(s); + return 0; +} + +/******************************************************* + * ditto + */ + +ushort toUshort(char[] s) +{ + uint v = toUint(s); + + if (v != cast(ushort)v) + goto Loverflow; + + return cast(ushort)v; + +Loverflow: + conv_overflow(s); + return 0; +} + +/******************************************************* + * ditto + */ + +byte toByte(char[] s) +{ + int v = toInt(s); + + if (v != cast(byte)v) + goto Loverflow; + + return cast(byte)v; + +Loverflow: + conv_overflow(s); + return 0; +} + +/******************************************************* + * ditto + */ + +ubyte toUbyte(char[] s) +{ + uint v = toUint(s); + + if (v != cast(ubyte)v) + goto Loverflow; + + return cast(ubyte)v; + +Loverflow: + conv_overflow(s); + return 0; +} + +/******************************************************* + * ditto + */ + +float toFloat(in char[] s) +{ + float f; + char* endptr; + char* sz; + + //writefln("toFloat('%s')", s); + sz = toStringz(s); + if (tango.stdc.ctype.isspace(*sz)) + goto Lerr; + + // BUG: should set __locale_decpoint to "." for DMC + + setErrno(0); + f = strtof(sz, &endptr); + if (getErrno() == ERANGE) + goto Lerr; + if (endptr && (endptr == sz || *endptr != 0)) + goto Lerr; + + return f; + + Lerr: + conv_error(s ~ " not representable as a float"); + assert(0); +} + +/******************************************************* + * ditto + */ + +double toDouble(in char[] s) +{ + double f; + char* endptr; + char* sz; + + //writefln("toDouble('%s')", s); + sz = toStringz(s); + if (tango.stdc.ctype.isspace(*sz)) + goto Lerr; + + // BUG: should set __locale_decpoint to "." for DMC + + setErrno(0); + f = strtod(sz, &endptr); + if (getErrno() == ERANGE) + goto Lerr; + if (endptr && (endptr == sz || *endptr != 0)) + goto Lerr; + + return f; + + Lerr: + conv_error(s ~ " not representable as a double"); + assert(0); +} + +/******************************************************* + * ditto + */ +real toReal(in char[] s) +{ + real f; + char* endptr; + char* sz; + + //writefln("toReal('%s')", s); + sz = toStringz(s); + if (tango.stdc.ctype.isspace(*sz)) + goto Lerr; + + // BUG: should set __locale_decpoint to "." for DMC + + setErrno(0); + f = strtold(sz, &endptr); + if (getErrno() == ERANGE) + goto Lerr; + if (endptr && (endptr == sz || *endptr != 0)) + goto Lerr; + + return f; + + Lerr: + conv_error(s ~ " not representable as a real"); + assert(0); +} + diff -r d3c148ca429b -r e0551773a005 src/lexer/Keyword.d --- a/src/lexer/Keyword.d Tue Aug 12 18:14:56 2008 +0200 +++ b/src/lexer/Keyword.d Tue Aug 12 18:19:34 2008 +0200 @@ -35,19 +35,64 @@ // type related "struct" : Tok.Struct, + "function" : Tok.Function, + "delegate" : Tok.Delegate, + "class" : Tok.Class, + "interface" : Tok.Interface, + "union" : Tok.Union, + "typedef" : Tok.Typedef, + "typeid" : Tok.Typeid, + "typeof" : Tok.Typeof, + "sizeof" : Tok.Sizeof, + "alias" : Tok.Alias, + "this" : Tok.This, + "new" : Tok.New, + "null" : Tok.Null, +// "super" : Tok.Super, // control flow "if" : Tok.If, "else" : Tok.Else, "while" : Tok.While, + "for" : Tok.For, "switch" : Tok.Switch, "case" : Tok.Case, "default" : Tok.Default, + "break" : Tok.Break, "return" : Tok.Return, "cast" : Tok.Cast, // modules "module" : Tok.Module, - "import" : Tok.Import + "import" : Tok.Import, + + // attributse + "public" : Tok.Public, + "private" : Tok.Private, + "protected" : Tok.Protected, + "package" : Tok.Package, + "export" : Tok.Export, + "static" : Tok.Static, + "final" : Tok.Final, + "const" : Tok.Const, + "abstract" : Tok.Abstract, + "override" : Tok.Override, + "deprecated": Tok.Deprecated, + "auto" : Tok.Auto, + "extern" : Tok.Extern, + + // exceptions + "assert" : Tok.Assert, + "throw" : Tok.Throw, + "try" : Tok.Try, + "catch" : Tok.Catch, + "finally" : Tok.Finally, + + // functions + "in" : Tok.In, + "out" : Tok.Out, + "body" : Tok.Body, + + "asm" : Tok.Asm ]; } diff -r d3c148ca429b -r e0551773a005 src/lexer/Lexer.d --- a/src/lexer/Lexer.d Tue Aug 12 18:14:56 2008 +0200 +++ b/src/lexer/Lexer.d Tue Aug 12 18:19:34 2008 +0200 @@ -37,7 +37,7 @@ foreach (c; "0123456789") charTable[c] = CharType.Number; - foreach (c; "(){}[];:.,=!<>+-*/%\"`") + foreach (c; "(){}[];:.,=!<>+-*/%&\"`") charTable[c] = CharType.Symbol; foreach (c; " \n") @@ -67,8 +67,11 @@ symbolFunctions['*'] = ☆ symbolFunctions['/'] = &slash; symbolFunctions['%'] = &percent; + symbolFunctions['&'] = ∧ symbolFunctions['"'] = &string; symbolFunctions['`'] = &string; + + last = Token(Tok.EOF, SLoc() + 1, 0); } /** @@ -80,27 +83,34 @@ */ Token next() { + Token res; switch (getNextChar) { case CharType.EOF: - SLoc loc; - return Token(Tok.EOF, loc, 0); + return Token(Tok.EOF, last.location, 0); case CharType.Whitespace: position += 1; - return this.next; + res = this.next; + break; case CharType.Symbol: - return lexSymbol; + res = lexSymbol; + break; case CharType.Letter: - return lexLetter; + res = lexLetter; + break; case CharType.Number: - return lexNumber; + res = lexNumber; + break; case CharType.Other: messages.report(UnexpectedTok, Loc(position)).fatal(ExitLevel.Lexer); } + if (res.type != Tok.EOF) + last = res; + return res; } /** @@ -120,7 +130,9 @@ return t; } + Token last; private: + Token eq() { if(source[position] == '=') @@ -188,30 +200,49 @@ { if(source[position] == '=') return Token(Tok.Le, Loc(position++ - 1), 2); + if(source[position] == '<') + return Token(Tok.LeftShift, Loc(position++ - 1), 2); return Token(Tok.Lt, Loc(position - 1), 1); } Token ge() { if(source[position] == '=') return Token(Tok.Ge, Loc(position++ - 1), 2); + if(source[position] == '>') + if(source[position+1] == '>') + { + position += 2; + return Token(Tok.UnsignedRightShift, Loc(position - 1), 3); + } + else + return Token(Tok.RightShift, Loc(position++ - 1), 2); return Token(Tok.Gt, Loc(position - 1), 1); } Token plus() { + if(source[position] == '=') + return Token(Tok.PlusAssign, Loc(position++ - 1), 2); return Token(Tok.Plus, Loc(position - 1), 1); } Token minus() { + if(source[position] == '=') + return Token(Tok.MinusAssign, Loc(position++ - 1), 2); return Token(Tok.Minus, Loc(position - 1), 1); } Token star() { + if(source[position] == '=') + return Token(Tok.StarAssign, Loc(position++ - 1), 2); return Token(Tok.Star, Loc(position - 1), 1); } Token slash() { + int p = position; switch(source[position]) { + case '=': + return Token(Tok.SlashAssign, Loc(position++ - 1), 2); case '/': while(getNextChar != CharType.EOF) { @@ -231,7 +262,7 @@ return this.next; } } - messages.report(UnexpectedEOFBlock,Loc(position)); + messages.report(UnexpectedEOFBlock,Loc(p)).fatal(ExitLevel.Lexer); case '+': position += 2; @@ -256,15 +287,22 @@ if(nesting == 0) return this.next; } - messages.report(UnexpectedEOFBlock,Loc(position)); + messages.report( + UnexpectedEOFBlock, + Loc(p)).fatal(ExitLevel.Lexer); default: return Token(Tok.Slash, Loc(position - 1), 1); } } - + Token and() + { + return Token(Tok.And, Loc(position - 1), 1); + } Token percent() { + if(source[position] == '=') + return Token(Tok.PercentAssign, Loc(position++ - 1), 2); return Token(Tok.Percent, Loc(position - 1), 1); } @@ -289,7 +327,15 @@ { ++position; if (source[position-1] == '"' ) + { + if(getNextChar != CharType.EOF) + if (source[position] == 'c' || + source[position] == 'w' || + source[position] == 'd') + position++; + return Token(Tok.String, Loc(start), position - start); + } else if (source[position-1] == '\\') position++; } @@ -310,12 +356,11 @@ Token lexNumber () { - bool sign = false; - bool dot = false; - bool e = false; + bool sign; int i = 0; + bool end = false; while(!end) { @@ -326,11 +371,15 @@ case CharType.Symbol: if(this.source[position+i] == '.') { - if(dot) - messages.report(OnlyOneDotFloating, Loc(position + i)); - dot = true; break; } + if (this.source[position+i] == '+' || + this.source[position+i] == '-') + { + if (source[position+i-1] == 'e' || + source[position+i-1] == 'E') + break; + } end = true; continue; case CharType.Letter: @@ -339,9 +388,6 @@ if (this.source[position+i] == 'e' || this.source[position+i] == 'E') { - if (e) - messages.report(OnlyOneEFloating, Loc(position + i)); - e = true; break; } end = true; @@ -354,6 +400,13 @@ i++; } + while(source[position+i] == 'u' || + source[position+i] == 'U' || + source[position+i] == 'L') + i += 1; + + + position += i; return Token(Tok.Integer, Loc(position - i), i); diff -r d3c148ca429b -r e0551773a005 src/lexer/Token.d --- a/src/lexer/Token.d Tue Aug 12 18:14:56 2008 +0200 +++ b/src/lexer/Token.d Tue Aug 12 18:19:34 2008 +0200 @@ -1,7 +1,8 @@ module lexer.Token; public -import basic.SourceLocation; +import basic.SourceLocation, + basic.SourceManager; import Integer = tango.text.convert.Integer; @@ -34,8 +35,10 @@ /** Get the type of the Token as a string */ - char[] getType () + char[] get (SourceManager sm) { + if (isIdentifier) + return sm.getText(asRange); return typeToString[this.type]; } @@ -44,7 +47,7 @@ */ char[] toString () { - return this.getType()~": Len: "~Integer.toString(this.length); + return typeToString[this.type]; } /// Get the range of this token @@ -64,7 +67,63 @@ */ bool isAssignment() { - return type == Tok.Assign; + return type >= Tok.Assign && type <= Tok.PercentAssign; + } + + /** + Returns true for all attributes( public, static, private...) + */ + bool isAttribute() + { + return type >= Tok.Public && type <= Tok.Extern; + } + + /** + Returns true for all attributes( public, static, private...) + */ + bool isBaseClassProtection() + { + return type >= Tok.Public && type <= Tok.Export; + } + + /** + just a shortcut to avoid `token.type == tok.Switch`. + */ + bool isSwitch() + { + return type == Tok.Switch; + } + + /** + just a shortcut to avoid `token.type == tok.While`. + */ + bool isWhile() + { + return type == Tok.While; + } + + /** + just a shortcut to avoid `token.type == tok.For`. + */ + bool isFor() + { + return type == Tok.For; + } + + /** + just a shortcut to avoid `token.type == tok.If`. + */ + bool isIf() + { + return type == Tok.If; + } + + /** + just a shortcut to avoid `token.type == tok.Return`. + */ + bool isReturn() + { + return type == Tok.Return; } /** @@ -90,12 +149,23 @@ /* Basic types */ Identifier, Integer, + String, /* Basic operators */ Assign, - Plus, Minus, - Star, Slash, Percent, + PlusAssign, + MinusAssign, + StarAssign, + SlashAssign, + PercentAssign, + Plus, + Minus, + Star, + Slash, + Percent, + LeftShift, RightShift, UnsignedRightShift, Comma, + And, /* Symbols */ OpenParentheses, @@ -115,6 +185,7 @@ Not, + /* Keywords */ Byte, Ubyte, Short, Ushort, @@ -129,17 +200,42 @@ Void, - Struct, + Struct, Function, Delegate, Class, This, + Interface, Union, Typedef, Typeid, + Typeof, Sizeof, Alias, If, Else, While, - Switch, Case, Default, - Return, Cast, - - String, + For, + Switch, Case, Default, Break, + Return, Cast, Module, Import, + New, Null, + + /* Attributes */ + Public, Private, Package, Export, Protected, + Static, + Final, + Const, + Abstract, + Override, + Deprecated, + Auto, + Extern, + + Align, + + Asm, + + In, Out, Body, + + Assert, Throw, Try, Catch, Finally, + + + + } /** @@ -154,49 +250,79 @@ typeToString = [ Tok.EOF:"EOF"[], - Tok.Identifier:"Identifier", - Tok.Byte:"Byte", - Tok.Short:"Short", - Tok.Int:"Int", - Tok.Long:"Long", - Tok.Char:"Char", - Tok.Wchar:"Wchar", - Tok.Dchar:"Dchar", - Tok.Bool:"Bool", - Tok.Void:"Void", - Tok.Eq:"Eq", - Tok.Ne:"Ne", - Tok.Lt:"Lt", - Tok.Le:"Le", - Tok.Gt:"Gt", - Tok.Ge:"Ge", - Tok.OpenParentheses:"OpenParentheses", - Tok.CloseParentheses:"CloseParentheses", - Tok.OpenBrace:"OpenBrace", - Tok.CloseBrace:"CloseBrace", - Tok.OpenBracket:"OpenBracket", - Tok.CloseBracket:"CloseBracket", - Tok.Dot:"Dot", - Tok.Assign:"Assign", - Tok.Plus:"Plus", - Tok.Minus:"Minus", - Tok.Star:"Star", - Tok.Slash:"Slash", - Tok.Percent:"Percent", - Tok.Integer:"Integer", - Tok.If:"If", - Tok.While:"While", - Tok.Switch:"Switch", - Tok.Case:"Case", - Tok.Default:"Default", - Tok.Comma:"Comma", - Tok.Return:"Return", - Tok.Struct:"Struct", - Tok.Colon:"Colon", - Tok.Seperator:"Seperator", - Tok.Cast:"Cast", - Tok.Module:"Module", - Tok.Import:"Import", - Tok.String:"String" + Tok.Identifier:"identifier", + Tok.Byte:"byte", + Tok.Short:"short", + Tok.Int:"int", + Tok.Long:"long", + Tok.Char:"char", + Tok.Wchar:"wchar", + Tok.Dchar:"dchar", + Tok.Bool:"bool", + Tok.Void:"void", + Tok.Function:"function", + Tok.Eq:"==", + Tok.Ne:"!=", + Tok.Lt:"<", + Tok.Le:"<=", + Tok.Gt:">", + Tok.Ge:">=", + Tok.OpenParentheses:"(", + Tok.CloseParentheses:")", + Tok.OpenBrace:"{", + Tok.CloseBrace:"}", + Tok.OpenBracket:"[", + Tok.CloseBracket:"]", + Tok.Dot:"-", + Tok.Assign:"=", + Tok.Plus:"+", + Tok.PlusAssign:"+=", + Tok.Minus:"-", + Tok.MinusAssign:"-=", + Tok.Star:"*", + Tok.StarAssign:"*=", + Tok.Slash:"/", + Tok.SlashAssign:"/=", + Tok.Percent:"%", + Tok.PercentAssign:"%=", + Tok.LeftShift:"<<", + Tok.RightShift:">>", + Tok.UnsignedRightShift:">>>", + Tok.Integer:"int", + Tok.If:"if", + Tok.While:"while", + Tok.For:"for", + Tok.Switch:"switch", + Tok.Case:"case", + Tok.Default:"default", + Tok.Comma:",", + Tok.Return:"return", + Tok.Struct:"struct", + Tok.Class:"class", + Tok.This:"this", + Tok.Colon:":", + Tok.Seperator:";", + Tok.And:"&", + Tok.Cast:"cast", + Tok.Module:"module", + Tok.Import:"import", + Tok.String:"String", + Tok.Public:"public", + Tok.Private:"private", + Tok.Protected:"protected", + Tok.Package:"package", + Tok.Export:"export", + Tok.Static:"static", + Tok.Final:"finale", + Tok.Public:"public", + Tok.Const:"const", + Tok.Abstract:"abstract", + Tok.Override:"override", + Tok.Deprecated:"deprecated", + Tok.Auto:"auto", + Tok.Extern:"extern", + Tok.New:"new", + Tok.Null:"null", + Tok.Alias:"alias" ]; } diff -r d3c148ca429b -r e0551773a005 src/parser/Action.d --- a/src/parser/Action.d Tue Aug 12 18:14:56 2008 +0200 +++ b/src/parser/Action.d Tue Aug 12 18:19:34 2008 +0200 @@ -2,6 +2,8 @@ import lexer.Token; +import basic.Attribute; + /** Used to indicate what type of operator is used in a given binary expression (and unary expressions?) @@ -9,6 +11,11 @@ public enum Operator { Assign, + AddAssign, + SubAssign, + MulAssign, + DivAssign, + ModAssign, Eq, Ne, @@ -17,6 +24,8 @@ Add, Sub, Mul, Div, Mod, + + LeftShift, RightShift, UnsignedRightShift, } @@ -31,11 +40,11 @@ Token tok; } -class PointerId : Id +class PointerTypeId : Id { - public static PointerId opCall(Id id) + public static PointerTypeId opCall(Id id) { - auto p = new PointerId(); + auto p = new PointerTypeId(); p.id = id; return p; } @@ -43,11 +52,11 @@ Id id; } -class ArrayId : Id +class StaticArrayTypeId : Id { - public static ArrayId opCall(Id id, Object number) + public static StaticArrayTypeId opCall(Id id, Object number) { - auto a = new ArrayId(); + auto a = new StaticArrayTypeId(); a.id = id; a.number = number; return a; @@ -57,6 +66,20 @@ Object number; } +class FunctionTypeId : Id +{ + public static FunctionTypeId opCall(Id id, DeclT[] decls) + { + auto f = new FunctionTypeId(); + f.id = id; + f.decls = decls; + return f; + } + + Id id; + DeclT[] decls; +} + /** Represents a fully qualified name, with some packages and a final identifier. The identifier should always be set, but packages may have length 0. @@ -76,6 +99,18 @@ } } + /** + A few aliases to indicate what methods should be dealing with the same + types. + Not typesafe, and not using typedef because users would need a lot of + casts (and base type would be void*, so no possibility to synchronize, + print etc.) + */ +alias Object ExprT; +alias Object StmtT; /// ditto +alias Object DeclT; /// ditto +alias Object ModuleT; /// ditto + /** All methods are optional. @@ -84,18 +119,7 @@ */ abstract class Action { - /** - A few aliases to indicate what methods should be dealing with the same - types. - Not typesafe, and not using typedef because users would need a lot of - casts (and base type would be void*, so no possibility to synchronize, - print etc.) - */ - alias Object ExprT; - alias Object StmtT; /// ditto - alias Object DeclT; /// ditto - alias Object ModuleT; /// ditto // -- Modules -- @@ -146,7 +170,12 @@ The other solution is an addParamToFunc or similar. */ - DeclT actOnDeclarator(ref Id type, ref Id name, ExprT init) + DeclT actOnDeclarator(ref Id type, ref Id name, ExprT init, Attribute att) + { + return null; + } + + DeclT actOnAliasDecl(DeclT decl, Attribute att) { return null; } @@ -154,7 +183,39 @@ /** Add a struct member to a struct. */ - void actOnStructMember(DeclT st_decl, DeclT m_decl) //ref Id type, ref Id name, ExprT init) + void actOnStructMember(DeclT st_decl, DeclT m_decl) + { + return null; + } + + /** + Add a class member to a struct. + */ + void actOnClassMember(DeclT cl_decl, DeclT m_decl) + { + return null; + } + + /** + Add a class member to a struct. + */ + void actOnClassBaseClass(DeclT cl_decl, ref Id name) + { + return null; + } + + /** + Add a class member to a struct. + */ + void actOnInterfaceMember(DeclT if_decl, DeclT m_decl) + { + return null; + } + + /** + Add a class member to a struct. + */ + void actOnInterfaceBaseClass(DeclT if_decl, ref Id name) { return null; } @@ -173,7 +234,7 @@ Called at the start of a function, doesn't get a lot of info - that is added later on, through addFuncArg and actOnEndOfFunction. */ - DeclT actOnStartOfFunctionDef(ref Id type, ref Id name) + DeclT actOnStartOfFunctionDef(ref Id type, ref Id name, Attribute att) { return null; } @@ -247,21 +308,28 @@ /** */ + StmtT actOnForStmt(ref Token forTok, StmtT init, ExprT cond, ExprT incre, StmtT forBody) + { + return null; + } + + /** + */ StmtT actOnDeclStmt(DeclT decl) { return null; } - StmtT actOnStartOfSwitchStmt() + StmtT actOnStartOfSwitchStmt(Token _switch, ExprT exp) { return null; } - void actOnCaseStmt() + void actOnCaseStmt(StmtT stmt, Token _case, ExprT[] exps, StmtT[] stmts) { } - void actOnDefaultStmt() + void actOnDefaultStmt(StmtT stmt, Token _default, StmtT[] stmts) { } @@ -298,6 +366,14 @@ } /** + This is called when strings are used in expression + */ + ExprT actOnStringExp(Token t) + { + return null; + } + + /** Unary operator. */ ExprT actOnUnaryOp(Token op, ExprT operand) @@ -338,7 +414,7 @@ /** Called when function calls are encountered. */ - ExprT actOnIndexEpr(ExprT array, ref Token left_bracket, ExprT index, + ExprT actOnIndexExpr(ExprT array, ref Token left_bracket, ExprT index, ref Token right_bracket) { return null; @@ -351,6 +427,30 @@ { return null; } + + /** + New expression. + */ + ExprT actOnNewExpr(ref Id type, ExprT[] a_args, ExprT[] c_args) + { + return null; + } + + /** + Array Literal expression. + */ + ExprT actOnArrayLiteralExpr(ExprT[] exps, SLoc start, SLoc end) + { + return null; + } + + /** + Null expression. + */ + ExprT actOnNullExpr(SLoc pos) + { + return null; + } } /** diff -r d3c148ca429b -r e0551773a005 src/parser/Parser.d --- a/src/parser/Parser.d Tue Aug 12 18:14:56 2008 +0200 +++ b/src/parser/Parser.d Tue Aug 12 18:19:34 2008 +0200 @@ -5,7 +5,8 @@ import parser.Action; -import basic.Message; +import basic.Message, + basic.Attribute; import basic.SmallArray, basic.SourceManager; @@ -34,91 +35,151 @@ this.action = act; Module m; - if (lexer.peek.type == Tok.Module) + if (peek.type == Tok.Module) { - Token _module = lexer.next; + Token _module = next(); ModuleName name = parseModuleName(); m = action.actOnModule(_module, sm.getText(name.asRange())); require(Tok.Seperator); } else { - SLoc loc = lexer.peek.location; + SLoc loc = peek.location; m = action.actOnImplicitModule(loc, sm.getFile(loc)); } - while (lexer.peek.type != Tok.EOF) - foreach (d; parseDeclDef()) + auto nes = parseAttributeInit; + while( !isa(Tok.EOF) ) + { + while ( peek.isAttribute ) + nes ~= parseAttribute(nes[$-1]); + + foreach (d; parseDeclDef(nes[$-1].a)) action.actOnModuleDecl(m, d); + nes = parseAttributeScope(nes); + } + return m; } private: - Decl[] parseDeclDef() + Decl[] parseDeclDef(Attribute a) { - Token t = lexer.peek; - if (t.type == Tok.Import) + if ( isa (Tok.Import) ) return parseImports(); - else - return [parseDecl()]; + + return [parseDecl(a)]; } - Decl parseDecl() + Decl parseDecl(Attribute att) { - Token t = lexer.peek; - - if (t.isBasicType || t.isIdentifier) + switch(peek.type) { - Id type; - Id iden; - int len = peekParseType; - if(lexer.peek(len).type == Tok.Identifier && len != 0) - { - type = parseType; -parseDeclAfterInvalidType: - iden = Id(require(Tok.Identifier)); - Token next = lexer.peek(); - if (next.type == Tok.Seperator) - { - Token sep = lexer.next(); - return action.actOnDeclarator(type, iden, null); - } - else if (next.type == Tok.Assign) + case Tok.Struct: + Id type = Id(next()); + Id iden = Id(require(Tok.Identifier)); + return parseStruct(type, iden, att); + + case Tok.Class: + Id type = Id(next()); + Id iden = Id(require(Tok.Identifier)); + return parseClass(type, iden, att); + + case Tok.Interface: + Id type = Id(next()); + Id iden = Id(require(Tok.Identifier)); + return parseInterface(type, iden, att); + + case Tok.Alias: + next(); + auto decl = parseDecl(Attribute()); + return action.actOnAliasDecl(decl, att); + + case Tok.Identifier: + Id type = parseType; + Id iden = Id(require(Tok.Identifier)); + + switch(peek.type) { - Token assign = lexer.next(); - Exp exp = parseExpression(); - require(Tok.Seperator); - return action.actOnDeclarator(type, iden, exp); + case Tok.Seperator: + Token sep = next(); + return action.actOnDeclarator(type, iden, null, att); + + case Tok.Assign: + Token assign = next(); + Exp exp = parseExpression(); + require(Tok.Seperator); + return action.actOnDeclarator(type, iden, exp, att); + + case Tok.OpenParentheses: + return parseFunc(type, iden, att); + + default: + auto n1 = next(); + isEOF(type.tok); + messages.report(UnexpectedTok, n1.location).arg(n1.get(sm)); + return action.actOnDeclarator(type, iden, null, att); } - else if (next.type == Tok.OpenParentheses) - return parseFunc(type, iden); - else - messages.report(UnexpectedTok, next.location).arg(next.getType); - } - t = lexer.peek(len); - messages.report(InvalidDeclType, t.location) - .arg(sm.getText(t.asRange)); - while(len--) - lexer.next; - while(lexer.peek.type != Tok.Identifier) - lexer.next; - type = Id(lexer.peek); - goto parseDeclAfterInvalidType; + messages.report(InvalidDeclType, peek.location) + .arg(sm.getText(peek.asRange)); + + default: + if (peek.isBasicType) + goto case Tok.Identifier; + + messages.report(UnexpectedTok, peek.location) + .arg(sm.getText(peek.asRange)); + + next(); + return null; } - else if (t.type == Tok.Struct) - { - Id type = Id(lexer.next); - Id iden = Id(require(Tok.Identifier)); - - return parseStruct(type, iden); - } - messages.report(UnexpectedTok, t.location) - .arg(t.getType) + messages.report(UnexpectedTok, peek.location) + .arg(peek.get(sm)) .arg(Tok.Identifier) .fatal(ExitLevel.Parser); } + Extern parseLinkageType() + { + Extern e = Extern.D; + if(peek(1).type != Tok.OpenParentheses) + return e; + + next(); next(); + + Token t = require(Tok.Identifier); + + switch(sm.getText(t.asRange)) + { + case "C": + if (peek(0).type == Tok.Plus && + peek(1).type == Tok.Plus) + e = Extern.CPlusPlus; + else + e = Extern.C; + break; + case "D": + break; + case "Windows": + e = Extern.Windows; + break; + case "Pascal": + e = Extern.Pascal; + break; + case "System": + e = Extern.System; + break; + default: + messages.report(UnexpectedLinkType, t.location); + } + + if (!isa(Tok.CloseParentheses)) + messages.report(UnexpectedTokSingle, peek.location); + + return e; + } + /** Parse a series of imports belonging to a single import token. */ @@ -129,16 +190,16 @@ void addToRes(Decl d) { res ~= d; } bool done = false; - while (!done && !on_a(Tok.Seperator)) + while (!done && !isa(Tok.Seperator)) { ModuleName mod = parseModuleName(); - Token tok = lexer.peek; + Token tok = peek; switch (tok.type) { case Tok.Comma: // import A, B.C; // parse another module-name - lexer.next(); + next(); res ~= action.actOnImport(_import, mod, null); break; case Tok.Assign: @@ -152,7 +213,7 @@ } //if (isStatic) // error("Static imports cannot be renamed"); - lexer.next(); + next(); Id name = mod.id; mod = parseModuleName(); // create from mod and rename to `name` @@ -161,7 +222,7 @@ case Tok.Colon: // import A : a; // selective imports, potentially import A : print = a - lexer.next(); + next(); Decl d = action.actOnImport(_import, mod, null); // do-while on a comma: // add explicit symbol @@ -194,42 +255,147 @@ require(Tok.Seperator); return res.safe(); Lerror: - while (!on_a (Tok.Seperator)) - lexer.next(); + while (!isa (Tok.Seperator)) + next(); return res.safe(); } /** + Parse interface + */ + Decl parseInterface(Id type, Id iden, Attribute att) + { + auto decl = action.actOnDeclarator(type, iden, null, att); + + if (peek.type == Tok.Colon) + // SuperInterfaces + { + next(); // Remove colon. + + Id identifier; + + // The identifier + identifier = Id(require(Tok.Identifier)); + + action.actOnInterfaceBaseClass(decl, identifier); + + // We should now have an optional list of items, each starting ',' + while (peek.type == Tok.Comma) + { + next(); // Remove comma + + // The identifier + identifier = Id(require(Tok.Identifier)); + + action.actOnInterfaceBaseClass(decl, identifier); + } + } + + require(Tok.OpenBrace); + + auto nes = parseAttributeInit; + while( !isa(Tok.EOF) && !isa(Tok.CloseBrace) ) + { + while ( peek.isAttribute ) + nes ~= parseAttribute(nes[$-1]); + + auto m_decl = parseDecl(nes[$-1].a); + action.actOnInterfaceMember(decl, m_decl); + + nes = parseAttributeScope(nes); + } + + require(Tok.CloseBrace); + + return decl; + } + + /** + Parse class + */ + Decl parseClass(Id type, Id iden, Attribute att) + { + auto decl = action.actOnDeclarator(type, iden, null, att); + + if (peek.type == Tok.Colon) + // BaseClassList - Super class and interfaces(in that order) + { + next(); // Remove colon. + + Token protection; + Id identifier; + + // First we expect an optional protection level. + if (peek.isBaseClassProtection) + protection = next(); + // Then the identifier + identifier = Id(require(Tok.Identifier)); + + action.actOnClassBaseClass(decl, identifier); + + // We should now have an optional list of items, each starting ',' + while (peek.type == Tok.Comma) + { + next(); // Remove comma + + // First we expect an optional protection level. + if (peek.isBaseClassProtection) + protection = next(); + // Then the identifier + identifier = Id(require(Tok.Identifier)); + + action.actOnClassBaseClass(decl, identifier); + } + } + + require(Tok.OpenBrace); + + auto nes = parseAttributeInit; + while( !isa(Tok.EOF) && !isa(Tok.CloseBrace) ) + { + while ( peek.isAttribute ) + nes ~= parseAttribute(nes[$-1]); + + switch(peek.type) + { + case Tok.This: + auto id = Id(next); + auto m_decl = parseFunc(iden, id, nes[$-1].a); + action.actOnClassMember(decl, m_decl); + break; + + default: + auto m_decl = parseDecl(nes[$-1].a); + action.actOnClassMember(decl, m_decl); + } + + nes = parseAttributeScope(nes); + } + + require(Tok.CloseBrace); + + return decl; + } + + /** Parse struct */ - Decl parseStruct(Id type, Id iden) + Decl parseStruct(Id type, Id iden, Attribute att) { - auto decl = action.actOnDeclarator(type, iden, null); + auto decl = action.actOnDeclarator(type, iden, null, att); require(Tok.OpenBrace); - while(lexer.peek.isBasicType || lexer.peek.isIdentifier) + auto nes = parseAttributeInit; + while( !isa(Tok.EOF) && !isa(Tok.CloseBrace) ) { - auto m_decl = parseDecl(); + while ( peek.isAttribute ) + nes ~= parseAttribute(nes[$-1]); + + auto m_decl = parseDecl(nes[$-1].a); action.actOnStructMember(decl, m_decl); -/* Id var_type = Id(lexer.next); - Id var_iden = Id(require(Tok.Identifier)); - Token next = lexer.peek(); - if (next.type == Tok.Seperator) - { - Token sep = lexer.next(); - action.actOnStructMember(decl, var_type, var_iden, null); - continue; - } - else if (next.type == Tok.Assign) - { - Token assign = lexer.next(); - Exp exp = parseExpression(); - require(Tok.Seperator); - action.actOnStructMember(decl, var_type, var_iden, exp); - continue; - } - messages.report(UnexpectedTok, next.location).arg(next.getType);*/ + + nes = parseAttributeScope(nes); } require(Tok.CloseBrace); @@ -237,6 +403,125 @@ return decl; } + Att[] parseAttributeInit() + { + Att[] nes; + nes ~= Att(); + nes[0].nested = Scope; + return nes; + } + + Att[] parseAttributeScope(Att[] nes) + { + while ( nes[$-1].nested == Single ) + nes.length = nes.length - 1; + + while ( isa(Tok.CloseBrace) && nes.length > 1) + { + while ( nes.length > 1 ) + { + if( nes[$-1].nested == Scope ) + { + nes.length = nes.length - 1; + next(); + break; + } + nes.length = nes.length - 1; + } + } + + return nes; + } + + Att parseAttribute(Att last) + { + Att _parseAttribute(Att last) + { + Att a = last; + a.nested = Single; + + switch(peek.type) + { + case Tok.Public: + a.a.setProtection(Protection.Public); + break; + case Tok.Private: + a.a.setProtection(Protection.Private); + break; + case Tok.Package: + a.a.setProtection(Protection.Package); + break; + case Tok.Protected: + a.a.setProtection(Protection.Protected); + break; + case Tok.Export: + a.a.setProtection(Protection.Export); + break; + case Tok.Static: + a.a.setStatic; + break; + case Tok.Final: + a.a.setFinal; + break; + case Tok.Const: + a.a.setConst; + break; + case Tok.Abstract: + a.a.setAbstract; + break; + case Tok.Override: + a.a.setOverride; + break; + case Tok.Deprecated: + a.a.setDeprecated; + break; + case Tok.Auto: + a.a.setAuto; + break; + case Tok.Extern: + Extern e = parseLinkageType; + a.a.setExtern(e); + break; + } + next(); + + return a; + } + + Att a = _parseAttribute(last); + + while (peek.isAttribute) + { + a = parseAttribute(a); + } + + if (peek.type == Tok.Colon) + { + a.nested = All; + next(); + } + else if (peek.type == Tok.OpenBrace) + { + a.nested = Scope; + next(); + } + + return a; + } + + enum : uint + { + Single, + Scope, + All + } + + struct Att + { + Attribute a; + uint nested; + } + /** Parse statements. @@ -244,26 +529,18 @@ */ Stmt parseStatement() { - Token t = lexer.peek; - - switch(t.type) + switch (peek.type) { case Tok.Return: - Token ret = lexer.next; + Token ret = next(); Exp exp; - if (lexer.peek.type != Tok.Seperator) + if (peek.type != Tok.Seperator) exp = parseExpression(); require(Tok.Seperator); return action.actOnReturnStmt(ret, exp); - /* - if (cond) - single statement | compound statement - [else - single statement | compound statement] - */ case Tok.If: - Token _if = lexer.next(); + Token _if = next(); require(Tok.OpenParentheses); Exp cond = parseExpression(); @@ -275,107 +552,185 @@ // something than can be passed along Token _else = _if; Stmt elseB; - if (lexer.peek.type == Tok.Else) + if (peek.type == Tok.Else) { - _else = lexer.next; + _else = next(); elseB = parseSingleOrCompoundStatement(); } - return action.actOnIfStmt(_if, cond, thenB, _else, elseB); - /* - while (cond) - single statement | compound statement - */ case Tok.While: - Token _while = lexer.next; + Token _while = next(); require(Tok.OpenParentheses); Exp cond = parseExpression(); require(Tok.CloseParentheses); Stmt bodyStmt = parseSingleOrCompoundStatement(); return action.actOnWhileStmt(_while, cond, bodyStmt); - /* - One of four things: - A declaration of a function/variable `type id ...` - A direct assignment `id = exp;` - An indirect assignment `id.id = exp` - Some sort of free standing expression + case Tok.For: + Token _for = next(); + require(Tok.OpenParentheses); + Stmt init; + if ( isa(Tok.Seperator)) + require(Tok.Seperator); + else + init = parseStatement(); + + Exp cond; + if ( !isa(Tok.Seperator)) + cond = parseExpression(); + require(Tok.Seperator); - The assignments should be handled as binary expressions? - */ - case Tok.Identifier: - Token iden = lexer.peek; - Token n = lexer.peek(1); - // Must be an decl, if we start with a basic type, or two - // identifiers in a row - if (iden.isBasicType() || iden.isIdentifier()) + Exp incre; + if ( !isa(Tok.CloseParentheses)) + incre = parseExpression(); + require(Tok.CloseParentheses); + + Stmt bodyStmt = parseSingleOrCompoundStatement(); + return action.actOnForStmt(_for, init, cond, incre, bodyStmt); + + case Tok.Switch: + auto t = next(); + require(Tok.OpenParentheses); + auto target = parseExpression(); + auto res = action.actOnStartOfSwitchStmt(t, target); + require(Tok.CloseParentheses); + require(Tok.OpenBrace); + while (true) { - if ( n.type == Tok.Star || n.type == Tok.OpenBracket) + Stmt[] statements; + if (isa(Tok.Default)) { - int len = peekParseType; - if(lexer.peek(len).type == Tok.Identifier && len != 0) - return action.actOnDeclStmt(parseVarDecl()); + Token _default = next(); + require(Tok.Colon); + statements.length = 0; + while (peek.type != Tok.Case + && peek.type != Tok.Default + && peek.type != Tok.CloseBrace) + statements ~= parseStatement(); + action.actOnDefaultStmt(res, _default, statements); + continue; + } + + Token _case = peek; + if (_case.type != Tok.Case) + break; + next(); + + Exp[] literals; + do + { + Exp e = parseExpression(); + literals ~= e; + } + while (skip(Tok.Comma)); + require(Tok.Colon); - Exp exp = parseExpression(); - require(Tok.Seperator); - return action.actOnExprStmt(exp); - } - - if (n.isIdentifier()) - return action.actOnDeclStmt(parseVarDecl()); + while (peek.type != Tok.Case + && peek.type != Tok.Default + && peek.type != Tok.CloseBrace) + statements ~= parseStatement(); + + action.actOnCaseStmt(res, _case, literals, statements); - // Expression: a.b, a = b, a(b) etc. - Exp exp = parseExpression(); - require(Tok.Seperator); - return action.actOnExprStmt(exp); + if (peek.type == Tok.CloseBrace) + break; + } + require(Tok.CloseBrace); + return res; + + case Tok.Star: + auto exp = parseExpression(); + require(Tok.Seperator); + return action.actOnExprStmt(exp); + + case Tok.Identifier: + // If it's a '*' it must be a method. Otherwise it won't give + // any sense. + + if (isa(Tok.Function, 1) || + isa(Tok.Identifier, 1) || + isa(Tok.Star, 1)) + { + Attribute a; + return action.actOnDeclStmt(parseDecl(a)); } - case Tok.Switch: - messages.report(UnexpectedTok, lexer.peek.location).arg(lexer.next.getType); - return null; + if (isa(Tok.OpenBracket, 1)) + { + int i = 1; + while (isa(Tok.OpenBracket, i) || + isa(Tok.Star, i) || + isa(Tok.Identifier, i)) + { + if (isa(Tok.Identifier, i)) + return action.actOnDeclStmt(parseDecl(Attribute())); + + i++; + if (isa(Tok.Star,i-1)) + continue; + // Must be OpenBracket here.. + + if (isa(Tok.Integer, i)) + i++; + else + if (isa(Tok.CloseBracket, i)) + return action.actOnDeclStmt(parseDecl(Attribute())); + else + i++; + + if (!isa(Tok.CloseBracket, i)) + break; + i++; + } + if (isa(Tok.Function, i)) + return action.actOnDeclStmt(parseDecl(Attribute())); + } + + // Expression: a.b, a = b, a(b) etc. + Exp exp = parseExpression(); + require(Tok.Seperator); + return action.actOnExprStmt(exp); + + case Tok.Void: // And all basic types + return action.actOnDeclStmt(parseVarDecl()); default: - if (t.isBasicType()) - goto case Tok.Identifier; - if (t.type == Tok.Star) - { - auto exp = parseExpression(); - require(Tok.Seperator); - return action.actOnExprStmt(exp); - } - messages.report(UnexpectedBeginStmt, lexer.peek.location).arg(lexer.next.getType); + if (peek.isBasicType) + goto case Tok.Void; + + messages.report(UnexpectedBeginStmt, peek.location).arg(peek.get(sm)); + require(Tok.Seperator); return null; } - messages.report(UnexpectedTok, t.location); - return null; } Decl parseVarDecl() { // manually hardcoded to only support "type id [= exp];" // as that is the only thing the codegen understands - Id type = parseType; - Id id = Id(lexer.next); + Id type = parseType(); + Id id = Id(next()); Exp init; if (skip(Tok.Assign)) init = parseExpression(); require(Tok.Seperator); - Decl d = action.actOnDeclarator(type, id, init); + Attribute att; + Decl d = action.actOnDeclarator(type, id, init, att); return d; } /** Parses a function/method given the already parsed return type and name */ - Decl parseFunc(ref Id type, ref Id name) + Decl parseFunc(ref Id type, ref Id name, Attribute att) { - Decl func = action.actOnStartOfFunctionDef(type, name); + Decl func = action.actOnStartOfFunctionDef(type, name, att); parseFuncArgs(func); - if(lexer.peek.type == Tok.Seperator) + if(peek.type == Tok.Seperator) { - lexer.next; + next(); return func; } Stmt stmt = parseCompoundStatement(); @@ -392,16 +747,16 @@ { require(Tok.OpenParentheses); // Remove the "(" token. - while(lexer.peek.type != Tok.CloseParentheses) + while(peek.type != Tok.CloseParentheses) { auto t = parseType(); Id i; - if(lexer.peek.type == Tok.Identifier) + if(peek.type == Tok.Identifier) i = parseIdentifier(); action.addFuncArg(func, t, i); - if(lexer.peek.type == Tok.Comma) - lexer.next; + if(peek.type == Tok.Comma) + next(); } require(Tok.CloseParentheses); // Remove the ")" @@ -413,7 +768,7 @@ */ Stmt parseSingleOrCompoundStatement() { - if (lexer.peek.type == Tok.OpenBrace) + if (peek.type == Tok.OpenBrace) return parseCompoundStatement(); return parseStatement(); } @@ -421,14 +776,14 @@ /** Parses a function-body or similar, expects an opening brace to be the current token. - + Will consume both the starting { and ending } */ Stmt parseCompoundStatement() { Token lbrace = require(Tok.OpenBrace); SmallArray!(Stmt, 32) stmts; // Try to use the stack only - while (lexer.peek.type != Tok.CloseBrace) + while ( !isa(Tok.CloseBrace) && !isa(Tok.EOF) ) stmts ~= parseStatement(); Token rbrace = require(Tok.CloseBrace); return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe()); @@ -436,13 +791,13 @@ Id parseIdentifier() { - Token tok = lexer.next; + Token tok = next(); if (tok.type is Tok.Identifier) return Id(tok); messages.report(UnexpectedTokSingle, tok.location) - .arg(tok.getType) + .arg(tok.get(sm)) .arg(Tok.Identifier); } @@ -453,8 +808,8 @@ while (skip(Tok.Dot)) { mod.packages ~= id; - if (lexer.peek.type != Tok.Identifier) { - messages.report(ExpectedIdAfterPackage, lexer.peek.location); + if (peek.type != Tok.Identifier) { + messages.report(ExpectedIdAfterPackage, peek.location); goto Lerror; } id = parseIdentifier(); @@ -463,7 +818,7 @@ return mod; Lerror: while (!skip(Tok.Seperator)) - lexer.next(); + next(); return mod; } @@ -473,7 +828,7 @@ */ Id parseType() { - Token type = lexer.next; + Token type = next(); Id currentType; @@ -481,92 +836,81 @@ messages.report(InvalidType, type.location); currentType = Id(type); - type = lexer.peek; - while(type.type == Tok.Star || type.type == Tok.OpenBracket) + while(true) { - if(type.type == Tok.Star) - { - currentType = PointerId(currentType); - lexer.next; - } - else + switch(peek.type) { - lexer.next; - if(lexer.peek.type == Tok.Integer) - currentType = ArrayId(currentType, action.actOnNumericConstant(require(Tok.Integer))); - require(Tok.CloseBracket); - - } - type = lexer.peek; - } + case Tok.Star: + currentType = PointerTypeId(currentType); + next(); + break; + case Tok.OpenBracket: + next(); + if (isa(Tok.Integer)) + currentType = StaticArrayTypeId( + currentType, + action.actOnNumericConstant( + require(Tok.Integer))); + require(Tok.CloseBracket); + break; + case Tok.Function: + next(); - return currentType; - } - int peekParseType() - { - int i; - Token type = lexer.peek(i); - - Id currentType; + require(Tok.OpenParentheses); // Remove the "(" token. - if ( !(type.isBasicType || type.type == Tok.Identifier) ) - return 0; + DeclT[] decls; - currentType = Id(type); - type = lexer.peek(++i); + while(peek.type != Tok.CloseParentheses) + { + auto t = parseType(); + Id i; + if(peek.type == Tok.Identifier) + i = parseIdentifier(); - while(type.type == Tok.Star || type.type == Tok.OpenBracket) - { - if(type.type == Tok.Star) - { - i++; + // Act on function type param + decls ~= action.actOnDeclarator(t, i, null, Attribute()); + + if(peek.type == Tok.Comma) + next(); + } + + currentType = FunctionTypeId(currentType, decls); + + require(Tok.CloseParentheses); // Remove the ")" + break; + default: + goto end; } - else - { - if(lexer.peek(i++).type != Tok.OpenBracket) - return 0; - if(lexer.peek(i).type == Tok.Integer) - { - i++; - if(lexer.peek(i++).type != Tok.CloseBracket) - return 0; - } - else - if(lexer.peek(i++).type != Tok.CloseBracket) - return 0; - - } - type = lexer.peek(i); } - - return i; +end: + return currentType; } private: // -- Expression parsing -- // Exp parsePostfixExp(Exp target) { - switch(lexer.peek.type) + switch(peek.type) { case Tok.Dot: - switch(lexer.peek(1).type) + switch(peek(1).type) { case Tok.Identifier: - Token op = lexer.next; - Id member = Id(lexer.next); + Token op = next(); + Id member = Id(next()); Exp exp = action.actOnMemberReference(target, op.location, member); return parsePostfixExp(exp); default: - Token t = lexer.peek(1); + Token t = peek(1); messages.report(ExpectedIdAfterDot, t.location); } case Tok.OpenBracket: - Token open = lexer.next; + Token open = next(); Exp index = parseExpression(); Token close = require(Tok.CloseBracket); - return action.actOnIndexEpr(target, open, index, close); + return action.actOnIndexExpr(target, open, index, close); default: return target; } @@ -575,15 +919,15 @@ Exp parseExpression(int p = 0) { auto exp = P(); - Token next = lexer.peek(); + Token n = peek(); BinOp* op = null; - while ((op = binary(next.type)) != null && op.prec >= p) + while ((op = binary(n.type)) != null && op.prec >= p) { - lexer.next(); + next(); int q = op.leftAssoc? 1 + op.prec : op.prec; auto exp2 = parseExpression(q); - exp = action.actOnBinaryOp(next.location, op.operator, exp, exp2); - next = lexer.peek(); + exp = action.actOnBinaryOp(n.location, op.operator, exp, exp2); + n = peek(); } return exp; @@ -591,46 +935,109 @@ Exp P() { - Token next = lexer.next(); - if (auto op = unary(next.type)) - return action.actOnUnaryOp(next, parseExpression(op.prec)); - else if (next.type == Tok.OpenParentheses) + Token n = next(); + if (auto op = unary(n.type)) + return action.actOnUnaryOp(n, parseExpression(op.prec)); + else if (n.type == Tok.OpenParentheses) { auto e = parseExpression(0); require(Tok.CloseParentheses); return e; } - else if (next.type == Tok.Identifier) + else if (n.type == Tok.Identifier) { - Exp value = action.actOnIdentifierExp(Id(next)); + Exp value = action.actOnIdentifierExp(Id(n)); Exp iden = parsePostfixExp(value); - switch(lexer.peek.type) + switch(peek.type) { case Tok.OpenParentheses: - Token lp = lexer.next; + Token lp = next(); SmallArray!(Exp, 8) args; - while(lexer.peek.type != Tok.CloseParentheses) + while(peek.type != Tok.CloseParentheses) { - if(lexer.peek.type == Tok.Comma) - lexer.next; + if(peek.type == Tok.Comma) + next(); args ~= parseExpression(); } - Token rp = lexer.next(); + Token rp = next(); return action.actOnCallExpr(iden, lp, args.unsafe(), rp); default: return iden; } } - else if (next.type == Tok.Cast) - return parseCast(next); - else if (next.type == Tok.Integer) - return action.actOnNumericConstant(next); - else if (next.type == Tok.String) - return action.actOnStringExp(next); + else if (n.type == Tok.Null) + return action.actOnNullExpr(n.location); + else if (n.type == Tok.Cast) + return parseCast(n); + else if (n.type == Tok.Integer) + return action.actOnNumericConstant(n); + else if (n.type == Tok.String) + return action.actOnStringExp(n); + else if (n.type == Tok.OpenBracket) + { + // Array literals + Exp[] exps; + exps ~= parseExpression(); + + while (isa(Tok.Comma)) + { + next(); + + if (isa(Tok.CloseBracket)) + break; + exps ~= parseExpression(); + } + scope e = require(Tok.CloseBracket); + return action.actOnArrayLiteralExpr(exps, n.location, e.location); + } + else if (n.type == Tok.New) + { + Exp[] allocator_args; + Exp[] constructor_args; + + if ( isa(Tok.OpenParentheses)) + { + next(); // Remove OpenParentheses - messages.report(ExpectedExp, next.location) + if ( !isa(Tok.CloseParentheses ) ) + { + allocator_args ~= parseExpression; + + while ( isa(Tok.Comma) ) + { + next(); // Remove Comma + + allocator_args ~= parseExpression; + } + } + require(Tok.CloseParentheses); + } + + auto type = parseType; + + if ( isa(Tok.OpenParentheses)) + { + next(); // Remove OpenParentheses + + if ( !isa(Tok.CloseParentheses ) ) + { + constructor_args ~= parseExpression; + + while ( isa(Tok.Comma) ) + { + next(); // Remove Comma + + constructor_args ~= parseExpression; + } + } + require(Tok.CloseParentheses); + } + return action.actOnNewExpr(type, allocator_args, constructor_args); + } + + messages.report(ExpectedExp, n.location) .fatal(ExitLevel.Parser); return null; } @@ -638,13 +1045,13 @@ Exp parseCast(ref Token _cast) { require(Tok.OpenParentheses); - auto next = lexer.next; - if(!next.isBasicType && !next.isIdentifier) - messages.report(ExpectedCastType, next.location); - + auto n = next(); + if(!n.isBasicType && !n.isIdentifier) + messages.report(ExpectedCastType, n.location); + require(Tok.CloseParentheses); auto exp = P(); - return action.actOnCastExpr(_cast, Id(next), exp); + return action.actOnCastExpr(_cast, Id(n), exp); } struct UnOp @@ -654,9 +1061,10 @@ } static const UnOp[] _unary = - [ + [ {Tok.Minus, 4}, - {Tok.Star, 4} + {Tok.Star, 4}, + {Tok.And, 4} ]; UnOp* unary(Tok t) { @@ -675,8 +1083,28 @@ } static const BinOp[] _binary = - [ - {Tok.Assign, 1, false, Operator.Assign}, + [ + {Tok.Assign, 1, false, Operator.Assign}, + {Tok.PlusAssign, 1, false, Operator.AddAssign}, + {Tok.MinusAssign, 1, false, Operator.SubAssign}, + {Tok.StarAssign, 1, false, Operator.MulAssign}, + {Tok.SlashAssign, 1, false, Operator.DivAssign}, + {Tok.PercentAssign, 1, false, Operator.ModAssign}, + + // =, += etc. 1 + // (need special-case for the ternary operator at this level) + // ||, 2 + // &&, 3 + // |, 4 + // &, 5 + // ^, 6 + // ==, !=, is, !is, 7 + // <, <= etc, 7 + // in, 7 + // <<, >>, >>>, 8 + // +, -, ~, 9 + // *, /, %, 10 + // unary operators here {Tok.Eq, 2, true, Operator.Eq}, {Tok.Ne, 2, true, Operator.Ne}, @@ -691,7 +1119,11 @@ {Tok.Star, 5, true, Operator.Mul}, {Tok.Slash, 5, true, Operator.Div}, - {Tok.Percent, 5, true, Operator.Mod} + {Tok.Percent, 5, true, Operator.Mod}, + + {Tok.LeftShift, 8, true, Operator.LeftShift}, + {Tok.RightShift, 8, true, Operator.RightShift}, + {Tok.UnsignedRightShift, 8, true, Operator.UnsignedRightShift} ]; BinOp* binary(Tok t) { @@ -705,27 +1137,53 @@ Token require(Tok t) { - if (lexer.peek().type != t) - messages.report(UnexpectedTokSingle, lexer.peek.location) - .arg(lexer.peek.getType) - .arg(t); - return lexer.next(); + if (!isa(t)) + if(isa(Tok.EOF)) + messages.report(UnexpectedEOF, + [lexer.last.asRange][], []) + .arg(lexer.last.get(sm)) + .fatal(ExitLevel.Parser); + else + messages.report(UnexpectedTokSingle, peek.location) + .arg(peek.get(sm)) + .arg(typeToString[t]); + return next(); } bool skip(Tok t) { - if (lexer.peek().type != t) + if (peek().type != t) return false; - lexer.next(); + next(); return true; } - bool on_a(Tok t) + bool isa(Tok t, int i = 0) { - return lexer.peek.type == t; + return peek(i).type == t; } - Lexer lexer; + bool isEOF(Token t) + { + if (isa(Tok.EOF)) + messages.report(UnexpectedEOF, + [t.asRange][], []) + .arg(t.get(sm)) + .fatal(ExitLevel.Parser); + return false; + } + + Token next() + { + return lexer.next; + } + + Token peek(int i = 0) + { + return lexer.peek(i); + } + + Lexer lexer; SourceManager sm; }