# HG changeset patch # User Anders Johnsen # Date 1211722964 -7200 # Node ID c658172ca8a0a747fd4a9547347fc788956217e7 # Parent 2deb4c1f0d93f4ca28c4f143dd4a2a5bb0c5cbf8 Parsing basic integers and floats. diff -r 2deb4c1f0d93 -r c658172ca8a0 ast/Exp.d --- a/ast/Exp.d Sun May 25 14:56:05 2008 +0200 +++ b/ast/Exp.d Sun May 25 15:42:44 2008 +0200 @@ -301,7 +301,7 @@ { super(ExpType.IntegerLit, loc); range = SourceRange(loc, loc + t.length); - this.name = substitute(t, "_", ""); + this.name = t; } char[] get() diff -r 2deb4c1f0d93 -r c658172ca8a0 basic/LiteralParsing.d --- a/basic/LiteralParsing.d Sun May 25 14:56:05 2008 +0200 +++ b/basic/LiteralParsing.d Sun May 25 15:42:44 2008 +0200 @@ -1,32 +1,279 @@ module basic.LiteralParsing.d; import basic.SourceLocation, - basic.Message; + basic.Message, + basic.conv; import tango.io.Stdout, tango.core.BitManip, Integer = tango.text.convert.Integer, + Utf = tango.text.convert.Utf, tango.text.Util; -enum StructType +enum StringType { Char, WChar, DChar } +enum NumberType +{ + Int, + UInt, + Long, + ULong, + Float, + Double, + Real +} + struct String { - StructType type; + StringType type; ubyte[] data; } +struct Number +{ + NumberType type; + ulong integer; + real floating; +} + private struct EscapeReturn { ubyte[] data; int length; } +private struct NumberReturn +{ + char[] data; + int length; +} + +Number parseNumber(char[] str, SourceLocation loc, MessageHandler messages) +{ + Number num; + + switch(str[0]) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if(str.contains('.') || str.contains('e') || str.contains('E')) + { + auto n = parseRealNumber(str, loc, messages); + + try + { + num.floating = toReal(n.data); + num.type = NumberType.Double; + } + catch(Exception e) + { + num.floating = real.init; + messages.report(FloatingToLarge, loc, loc + n.length - 1); + } + + if(num.floating > double.max) + num.type = NumberType.Real; + } + else + { + auto n = parseDecimalDigits(str, loc, messages); + + try + { + num.integer = toUlong(n.data); + } + catch(Exception e) + { + num.integer = 0; + messages.report(IntegerToLarge, loc, loc + n.length - 1); + } + + if(num.integer > uint.max) + num.type = NumberType.Long; + if(num.integer > long.max) + num.type = NumberType.ULong; + } + break; + default: + messages.report(InvalidStartInteger, loc, loc+1); + } + +// printNumber(str, num); + return num; +} + +NumberReturn parseDecimalDigits(char[] str, SourceLocation loc, MessageHandler messages) +{ + int i = 0; + + char[] number; + + bool end; + while(!end) + { + switch(str[i]) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + number ~= str[i]; + break; + case '_': + break; + default: + end = true; + } + i++; + if(str.length == i) + { + end = true; + i++; + } + } + + NumberReturn res; + res.length = i - 1; + res.data = number; + + return res; +} + +NumberReturn parseRealNumber(char[] str, SourceLocation loc, MessageHandler messages) +{ + int i = 0; + + bool dot, e; + char[] number; + + NumberReturn num; + + bool end; + while(!end) + { + switch(str[i]) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '_': + auto n = parseDecimalDigits(str[i..$], loc, messages); + number ~= n.data; + i += n.length; + break; + case '.': + if(e) + messages.report(FloatingDotInE, loc + i, loc + i + 1); + else if(dot) + messages.report(OnlyOneDotFloating, loc + i, loc + i + 1); + else + { + dot = true; + number ~= str[i]; + } + i++; + break; + case 'e': + case 'E': + if(e) + messages.report(OnlyOneEFloating, loc + i, loc + i + 1); + else + { + e = true; + number ~= str[i]; + } + i++; + break; + case '+': + case '-': + if (number[$-1] != 'e' && + number[$-1] != 'E') + messages.report(FloatingBadLocation, loc + i, loc + i + 1) + .arg(str[i]); + else + number ~= str[i]; + i++; + break; + default: + end = true; + } + if(str.length == i) + end = true; + } + + if (number[$-1] == '+' || + number[$-1] == '-' || + number[$-1] == 'e' || + number[$-1] == 'E') + { + messages.report(FloatingInvalidEnd, loc + i - 1, loc + i); + return num; + } + + num.data = number; + num.length = i; + + return num; +} + + +void printNumber(char[] str, Number num) +{ + Stdout(str)(" have become").newline; + switch(num.type) + { + case NumberType.Int: + Stdout(num.integer)(" of type ")("int"); + break; + case NumberType.UInt: + Stdout(num.integer)(" of type ")("uint"); + break; + case NumberType.Long: + Stdout(num.integer)(" of type ")("long"); + break; + case NumberType.ULong: + Stdout(num.integer)(" of type ")("ulong"); + break; + case NumberType.Float: + Stdout(num.floating)(" of type ")("float"); + break; + case NumberType.Double: + Stdout(num.floating)(" of type ")("double"); + break; + case NumberType.Real: + Stdout(num.floating)(" of type ")("real"); + break; + } + Stdout().newline; +} + + String parseString(char[] str, SourceLocation loc, MessageHandler messages) { String strBuf; @@ -76,9 +323,7 @@ hexBuf.length = 0; } } - else if(whitespace.contains(str[i])) - {} - else + else if(!whitespace.contains(str[i])) messages.report(InvalidHexStrChar, loc + i, loc + i + 1); i++; @@ -88,7 +333,7 @@ return strBuf; } - +// String parseDoubleQuotedString(char[] str, String strBuf, SourceLocation loc, MessageHandler messages) @@ -112,6 +357,22 @@ break; } + if(str.length > i + 1) // Then we have a postfix. Lexer makes sure this is c, w or d. + switch(str[i+1]) + { + case 'c': + break; + case 'w': + strBuf.data = cast(ubyte[])Utf.toString16(cast(char[])strBuf.data); + strBuf.type = StringType.WChar; + break; + case 'd': + strBuf.data = cast(ubyte[])Utf.toString32(cast(char[])strBuf.data); + strBuf.type = StringType.DChar; + break; + } + + return strBuf; } @@ -261,6 +522,31 @@ uint i = Integer.toLong(octBuf, 8); res.data ~= i; break; + case '&': + int i = 2; + char[] s; + while(str[i] != ';') + { + if(str[i] == '"') + { + messages.report(NoCharEntityEnd, loc+i, loc+i+1); + res.length = 2; + break; + } + s ~= str[i]; + i++; + } + + if ( s in characterEntities ) + { + res.data ~= parseToUtf8(characterEntities[s]); + } + else + messages.report(InvalidCharEntity, loc + 2, loc + i); + + res.length = i + 1; // remember the ; + + break; default: messages.report(InvalidStrEscape, loc, loc + 2); res.length += 2; @@ -337,17 +623,278 @@ char[] s; switch(strBuf.type) { - case StructType.Char: + case StringType.Char: Stdout(str)(" have become").newline() (cast(char[])strBuf.data).newline; break; - case StructType.WChar: + case StringType.WChar: Stdout(str)(" have become").newline() (cast(wchar[])strBuf.data).newline; break; - case StructType.DChar: + case StringType.DChar: Stdout(str)(" have become").newline() (cast(dchar[])strBuf.data).newline; break; } } + +static ushort[char[]] characterEntities; + +static this() +{ + characterEntities = + [ + "quot"[]: 34, + "amp": 38, + "lt": 60, + "gt": 62, + "OElig": 338, + "oelig": 339, + "Scaron": 352, + "scaron": 353, + "Yuml": 376, + "circ": 710, + "tilde": 732, + "ensp": 8194, + "emsp": 8195, + "thinsp": 8201, + "zwnj": 8204, + "zwj": 8205, + "lrm": 8206, + "rlm": 8207, + "ndash": 8211, + "mdash": 8212, + "lsquo": 8216, + "rsquo": 8217, + "sbquo": 8218, + "ldquo": 8220, + "rdquo": 8221, + "bdquo": 8222, + "dagger": 8224, + "Dagger": 8225, + "permil": 8240, + "lsaquo": 8249, + "rsaquo": 8250, + "euro": 8364, + "nbsp": 160, + "iexcl": 161, + "cent": 162, + "pound": 163, + "curren": 164, + "yen": 165, + "brvbar": 166, + "sect": 167, + "uml": 168, + "copy": 169, + "ordf": 170, + "laquo": 171, + "not": 172, + "shy": 173, + "reg": 174, + "macr": 175, + "deg": 176, + "plusmn": 177, + "sup2": 178, + "sup3": 179, + "acute": 180, + "micro": 181, + "para": 182, + "middot": 183, + "cedil": 184, + "sup1": 185, + "ordm": 186, + "raquo": 187, + "frac14": 188, + "frac12": 189, + "frac34": 190, + "iquest": 191, + "Agrave": 192, + "Aacute": 193, + "Acirc": 194, + "Atilde": 195, + "Auml": 196, + "Aring": 197, + "AElig": 198, + "Ccedil": 199, + "Egrave": 200, + "Eacute": 201, + "Ecirc": 202, + "Euml": 203, + "Igrave": 204, + "Iacute": 205, + "Icirc": 206, + "Iuml": 207, + "ETH": 208, + "Ntilde": 209, + "Ograve": 210, + "Oacute": 211, + "Ocirc": 212, + "Otilde": 213, + "Ouml": 214, + "times": 215, + "Oslash": 216, + "Ugrave": 217, + "Uacute": 218, + "Ucirc": 219, + "Uuml": 220, + "Yacute": 221, + "THORN": 222, + "szlig": 223, + "agrave": 224, + "aacute": 225, + "acirc": 226, + "atilde": 227, + "auml": 228, + "aring": 229, + "aelig": 230, + "ccedil": 231, + "egrave": 232, + "eacute": 233, + "ecirc": 234, + "euml": 235, + "igrave": 236, + "iacute": 237, + "icirc": 238, + "iuml": 239, + "eth": 240, + "ntilde": 241, + "ograve": 242, + "oacute": 243, + "ocirc": 244, + "otilde": 245, + "ouml": 246, + "divide": 247, + "oslash": 248, + "ugrave": 249, + "uacute": 250, + "ucirc": 251, + "uuml": 252, + "yacute": 253, + "thorn": 254, + "yuml": 255, + "fnof": 402, + "Alpha": 913, + "Beta": 914, + "Gamma": 915, + "Delta": 916, + "Epsilon": 917, + "Zeta": 918, + "Eta": 919, + "Theta": 920, + "Iota": 921, + "Kappa": 922, + "Lambda": 923, + "Mu": 924, + "Nu": 925, + "Xi": 926, + "Omicron": 927, + "Pi": 928, + "Rho": 929, + "Sigma": 931, + "Tau": 932, + "Upsilon": 933, + "Phi": 934, + "Chi": 935, + "Psi": 936, + "Omega": 937, + "alpha": 945, + "beta": 946, + "gamma": 947, + "delta": 948, + "epsilon": 949, + "zeta": 950, + "eta": 951, + "theta": 952, + "iota": 953, + "kappa": 954, + "lambda": 955, + "mu": 956, + "nu": 957, + "xi": 958, + "omicron": 959, + "pi": 960, + "rho": 961, + "sigmaf": 962, + "sigma": 963, + "tau": 964, + "upsilon": 965, + "phi": 966, + "chi": 967, + "psi": 968, + "omega": 969, + "thetasym": 977, + "upsih": 978, + "piv": 982, + "bull": 8226, + "hellip": 8230, + "prime": 8242, + "Prime": 8243, + "oline": 8254, + "frasl": 8260, + "weierp": 8472, + "image": 8465, + "real": 8476, + "trade": 8482, + "alefsym": 8501, + "larr": 8592, + "uarr": 8593, + "rarr": 8594, + "darr": 8595, + "harr": 8596, + "crarr": 8629, + "lArr": 8656, + "uArr": 8657, + "rArr": 8658, + "dArr": 8659, + "hArr": 8660, + "forall": 8704, + "part": 8706, + "exist": 8707, + "empty": 8709, + "nabla": 8711, + "isin": 8712, + "notin": 8713, + "ni": 8715, + "prod": 8719, + "sum": 8721, + "minus": 8722, + "lowast": 8727, + "radic": 8730, + "prop": 8733, + "infin": 8734, + "ang": 8736, + "and": 8743, + "or": 8744, + "cap": 8745, + "cup": 8746, + "int": 8747, + "there4": 8756, + "sim": 8764, + "cong": 8773, + "asymp": 8776, + "ne": 8800, + "equiv": 8801, + "le": 8804, + "ge": 8805, + "sub": 8834, + "sup": 8835, + "nsub": 8836, + "sube": 8838, + "supe": 8839, + "oplus": 8853, + "otimes": 8855, + "perp": 8869, + "sdot": 8901, + "lceil": 8968, + "rceil": 8969, + "lfloor": 8970, + "rfloor": 8971, + "lang": 9001, + "rang": 9002, + "loz": 9674, + "spades": 9824, + "clubs": 9827, + "hearts": 9829, + "diams": 9830 + ]; +} diff -r 2deb4c1f0d93 -r c658172ca8a0 basic/Messages.d --- a/basic/Messages.d Sun May 25 14:56:05 2008 +0200 +++ b/basic/Messages.d Sun May 25 15:42:44 2008 +0200 @@ -34,8 +34,16 @@ InvalidStrEscape, InvalidUtf8Hex, InvalidHexStrChar, + InvalidCharEntity, + NoCharEntityEnd, StringShortEscape, StringHexInvalid, + InvalidStartInteger, + IntegerToLarge, + FloatingToLarge, + FloatingInvalidEnd, + FloatingBadLocation, + FloatingDotInE, } enum MessageType @@ -85,9 +93,17 @@ InvalidStrPrefix : E(Err, "Invalid string literal prefix"), InvalidStrEscape : E(Err, "Invalid escape sequence"), InvalidUtf8Hex : E(Err, "Invalid Utf8 hex char"), + NoCharEntityEnd : E(Err, "Character entity have no end, insert ';'"), + InvalidCharEntity : E(Err, "Invalid character entity"), InvalidHexStrChar : E(Err, "Invalid character in hex string"), StringShortEscape : E(Err, "String literal is to short for escape sequence"), - StringHexInvalid : E(Err, "Hex escape sequence have invalid digit at position %0 of %1") + StringHexInvalid : E(Err, "Hex escape sequence have invalid digit at position %0 of %1"), + InvalidStartInteger : E(Err, "Invalid begining of number"), + IntegerToLarge : E(Err, "Integer is to large. Max size is 18446744073709551615"), + FloatingToLarge : E(Err, "Floating literal is to large"), + FloatingInvalidEnd : E(Err, "Floating literal have wrong ending"), + FloatingBadLocation : E(Err, "Bad location for '%0' in floting literal"), + FloatingDotInE : E(Err, "There cannot be a dot in the exponent of a floating literal") ]; } diff -r 2deb4c1f0d93 -r c658172ca8a0 lexer/Lexer.d --- a/lexer/Lexer.d Sun May 25 14:56:05 2008 +0200 +++ b/lexer/Lexer.d Sun May 25 15:42:44 2008 +0200 @@ -289,7 +289,15 @@ { ++position; if (source[position-1] == '"' ) + { + if(getNextChar != CharType.EOF) + if (source[position] == 'c' || + source[position] == 'w' || + source[position] == 'd') + position++; + return Token(Tok.String, Loc(start), position - start); + } else if (source[position-1] == '\\') position++; } @@ -310,12 +318,11 @@ Token lexNumber () { - bool sign = false; - bool dot = false; - bool e = false; + bool sign; int i = 0; + bool end = false; while(!end) { @@ -326,11 +333,15 @@ case CharType.Symbol: if(this.source[position+i] == '.') { - if(dot) - messages.report(OnlyOneDotFloating, Loc(position + i)); - dot = true; break; } + if (this.source[position+i] == '+' || + this.source[position+i] == '-') + { + if (source[position+i-1] == 'e' || + source[position+i-1] == 'E') + break; + } end = true; continue; case CharType.Letter: @@ -339,9 +350,6 @@ if (this.source[position+i] == 'e' || this.source[position+i] == 'E') { - if (e) - messages.report(OnlyOneEFloating, Loc(position + i)); - e = true; break; } end = true; @@ -354,6 +362,13 @@ i++; } + while(source[position+i] == 'u' || + source[position+i] == 'U' || + source[position+i] == 'L') + i += 1; + + + position += i; return Token(Tok.Integer, Loc(position - i), i); diff -r 2deb4c1f0d93 -r c658172ca8a0 sema/LiteralInterpreter.d --- a/sema/LiteralInterpreter.d Sun May 25 14:56:05 2008 +0200 +++ b/sema/LiteralInterpreter.d Sun May 25 15:42:44 2008 +0200 @@ -23,5 +23,10 @@ auto type = parseString(exp.str, exp.loc, messages); } + void visitIntegerLit(IntegerLit exp) + { + auto type = parseNumber(exp.name, exp.loc, messages); + } + MessageHandler messages; } diff -r 2deb4c1f0d93 -r c658172ca8a0 tests/parser/float_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/parser/float_1.d Sun May 25 15:42:44 2008 +0200 @@ -0,0 +1,8 @@ + + +void main() +{ + float f1 = 4_.5_e5+4; + float f2 = 4._5_e+344; + float f3 = 4.__5_e-_2; +} diff -r 2deb4c1f0d93 -r c658172ca8a0 tests/parser/int_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/parser/int_1.d Sun May 25 15:42:44 2008 +0200 @@ -0,0 +1,21 @@ + +int main() +{ + int i1 = 123_456; + int i2 = 1_2_3_4_5_6_; + + int i3 = 43_422_253; + long i4 = 34_322_523_123; + + long i5 = 43_422_253L; + long i6 = 34_322_523_123L; + + uint i7 = 43_422_253u; + ulong i8 = 18_446_744_073_709_551_615U; + + ulong i9 = 0UL; + ulong i10 = 18_446_744_073_709_551_615LU; + + ulong i10 = 18_446_744_073_709_551_615_23LU; + +} diff -r 2deb4c1f0d93 -r c658172ca8a0 tests/parser/string_1.d --- a/tests/parser/string_1.d Sun May 25 14:56:05 2008 +0200 +++ b/tests/parser/string_1.d Sun May 25 15:42:44 2008 +0200 @@ -29,6 +29,13 @@ char[6] s15 = x"61 62 63 64 65 66 67 68"; + char[4] s16 = "\®\&"; + + char[4] s16 = "\®\&"c; + wchar[2] s16 = "\®\&"w; + dchar[2] s16 = "\®\&"d; + + return 0; }