# HG changeset patch # User Aziz K?ksal # Date 1204321884 -3600 # Node ID cf2ad5df025c25e77bfa0d096c3826d9d62db7c1 # Parent f7688996bf0834c842bcdac0117f0c0e59f26ba8 Added documentation comments. Removed Lexer.loadKeywords() and revised Lexer.isReservedIdentifier(). Also removed Lexer.getTokens(). Renamed keywords to g_reservedIds. Renamed classNames to g_classNames. Added PRE and DMDBUG macros. diff -r f7688996bf08 -r cf2ad5df025c trunk/src/cmd/ASTStats.d --- a/trunk/src/cmd/ASTStats.d Fri Feb 29 19:25:21 2008 +0100 +++ b/trunk/src/cmd/ASTStats.d Fri Feb 29 22:51:24 2008 +0100 @@ -19,7 +19,7 @@ /// Starts counting. uint[] count(Node root) { - table = new uint[classNames.length]; + table = new uint[g_classNames.length]; super.visitN(root); return table; } diff -r f7688996bf08 -r cf2ad5df025c trunk/src/cmd/Statistics.d --- a/trunk/src/cmd/Statistics.d Fri Feb 29 19:25:21 2008 +0100 +++ b/trunk/src/cmd/Statistics.d Fri Feb 29 22:51:24 2008 +0100 @@ -32,7 +32,7 @@ if (allocateTokensTable) s.tokensTable = new uint[TOK.MAX]; if (allocateNodesTable) - s.nodesTable = new uint[classNames.length]; + s.nodesTable = new uint[g_classNames.length]; return s; } @@ -129,7 +129,7 @@ Stdout.formatln(" {,10} | {}", "Count", "Node kind"); Stdout("-----------------------------").newline; foreach (i, count; total.nodesTable) - Stdout.formatln(" {,10} | {}", count, classNames[i]); + Stdout.formatln(" {,10} | {}", count, g_classNames[i]); Stdout("// End of nodes table.").newline; } } diff -r f7688996bf08 -r cf2ad5df025c trunk/src/dil/ast/DefaultVisitor.d --- a/trunk/src/dil/ast/DefaultVisitor.d Fri Feb 29 19:25:21 2008 +0100 +++ b/trunk/src/dil/ast/DefaultVisitor.d Fri Feb 29 22:51:24 2008 +0100 @@ -358,7 +358,7 @@ char[] generateDefaultVisitMethods() { char[] text; - foreach (className; classNames) + foreach (className; g_classNames) text ~= "private mixin .visitDefault!("~className~") _"~className~";\n" "override returnType!(\""~className~"\") visit("~className~" node)" "{return _"~className~".visitDefault(node);}\n"; diff -r f7688996bf08 -r cf2ad5df025c trunk/src/dil/ast/NodesEnum.d --- a/trunk/src/dil/ast/NodesEnum.d Fri Feb 29 19:25:21 2008 +0100 +++ b/trunk/src/dil/ast/NodesEnum.d Fri Feb 29 22:51:24 2008 +0100 @@ -16,7 +16,7 @@ } /// A list of all class names that inherit from Node. -static const char[][] classNames = [ +static const char[][] g_classNames = [ // Declarations: "CompoundDeclaration", "EmptyDeclaration", @@ -216,7 +216,7 @@ char[] generateNodeKindMembers() { char[] text; - foreach (className; classNames) + foreach (className; g_classNames) text ~= className ~ ","; return text; } diff -r f7688996bf08 -r cf2ad5df025c trunk/src/dil/ast/Visitor.d --- a/trunk/src/dil/ast/Visitor.d Fri Feb 29 19:25:21 2008 +0100 +++ b/trunk/src/dil/ast/Visitor.d Fri Feb 29 22:51:24 2008 +0100 @@ -21,7 +21,7 @@ char[] generateVisitMethods() { char[] text; - foreach (className; classNames) + foreach (className; g_classNames) text ~= "returnType!(\""~className~"\") visit("~className~" node){return node;}\n"; return text; } @@ -63,7 +63,7 @@ char[] generateDispatchFunctions() { char[] text; - foreach (className; classNames) + foreach (className; g_classNames) text ~= "returnType!(\""~className~"\") visit"~className~"(Visitor visitor, "~className~" c)\n" "{ return visitor.visit(c); }\n"; return text; @@ -83,7 +83,7 @@ char[] generateVTable() { char[] text = "["; - foreach (className; classNames) + foreach (className; g_classNames) text ~= "cast(void*)&visit"~className~",\n"; return text[0..$-2]~"]"; // slice away last ",\n" } @@ -102,7 +102,7 @@ /// The table holding function pointers to the second dispatch functions. static const void*[] dispatch_vtable = mixin(generateVTable()); - static assert(dispatch_vtable.length == classNames.length, "vtable length doesn't match number of classes"); + static assert(dispatch_vtable.length == g_classNames.length, "vtable length doesn't match number of classes"); /// Looks up the second dispatch function for n and returns that. Node function(Visitor, Node) getDispatchFunction()(Node n) diff -r f7688996bf08 -r cf2ad5df025c trunk/src/dil/lexer/IdTable.d --- a/trunk/src/dil/lexer/IdTable.d Fri Feb 29 19:25:21 2008 +0100 +++ b/trunk/src/dil/lexer/IdTable.d Fri Feb 29 22:51:24 2008 +0100 @@ -38,7 +38,7 @@ /// Loads keywords and predefined identifiers into the static table. static this() { - foreach (ref k; keywords) + foreach (ref k; g_reservedIds) staticTable[k.str] = &k; foreach (id; Ident.allIds()) staticTable[id.str] = id; diff -r f7688996bf08 -r cf2ad5df025c trunk/src/dil/lexer/Keywords.d --- a/trunk/src/dil/lexer/Keywords.d Fri Feb 29 19:25:21 2008 +0100 +++ b/trunk/src/dil/lexer/Keywords.d Fri Feb 29 22:51:24 2008 +0100 @@ -8,7 +8,7 @@ import dil.lexer.Identifier; /// Table of reserved identifiers. -static const Identifier[] keywords = [ +static const Identifier[] g_reservedIds = [ {"abstract", TOK.Abstract}, {"alias", TOK.Alias}, {"align", TOK.Align}, diff -r f7688996bf08 -r cf2ad5df025c trunk/src/dil/lexer/Lexer.d --- a/trunk/src/dil/lexer/Lexer.d Fri Feb 29 19:25:21 2008 +0100 +++ b/trunk/src/dil/lexer/Lexer.d Fri Feb 29 22:51:24 2008 +0100 @@ -43,6 +43,7 @@ uint lineNum = 1; /// Current, actual source text line number. uint lineNum_hline; /// Line number set by #line. uint inTokenString; /// > 0 if inside q{ } + /// Holds the original file path and the modified one (by #line.) NewlineData.FilePaths* filePaths; /// Construct a Lexer object. @@ -81,6 +82,7 @@ scanShebang(); } + /// The destructor deletes the doubly-linked token list. ~this() { auto token = head.next; @@ -117,6 +119,7 @@ } } + /// Sets the value of the special token. void finalizeSpecialToken(ref Token t) { assert(t.srcText[0..2] == "__"); @@ -174,6 +177,9 @@ this.lineBegin = p; } + /// Scans the next token in the source text. + /// + /// Creates a new token if t.next is null and appends it to the list. private void scanNext(ref Token* t) { assert(t !is null); @@ -218,10 +224,9 @@ return false; } - /++ - This is the old scan method. - TODO: profile old and new to see which one is faster. - +/ + /// The main method which recognizes the characters that make up a token. + /// + /// Complicated tokens are scanned in separate methods. public void scan(ref Token t) in { @@ -641,6 +646,7 @@ } } + /// Converts a string literal to an integer. template toUint(char[] T) { static assert(0 < T.length && T.length <= 4); @@ -651,35 +657,45 @@ } static assert(toUint!("\xAA\xBB\xCC\xDD") == 0xAABBCCDD); - // Can't use this yet due to a bug in DMD (bug id=1534). - template case_(char[] str, TOK tok, char[] label) + /// Constructs case statements. E.g.: + /// --- + //// // case_!("<", "Less", "Lcommon") -> + /// case 60u: + /// t.kind = TOK.Less; + /// goto Lcommon; + /// --- + /// Note:Can't use this yet due to a $(DMDBUG 1534, bug) in DMD. + template case_(char[] str, char[] kind, char[] label) { const char[] case_ = - `case `~toUint!(str).stringof~`: - - goto `~label~`;`; + `case `~toUint!(str).stringof~`:` + `t.kind = TOK.`~kind~`;` + `goto `~label~`;`; } + //pragma(msg, case_!("<", "Less", "Lcommon")); - template case_L4(char[] str, TOK tok) + template case_L4(char[] str, TOK kind) { - const char[] case_L4 = case_!(str, tok, "Lcommon_4"); + const char[] case_L4 = case_!(str, kind, "Lcommon_4"); } - template case_L3(char[] str, TOK tok) + template case_L3(char[] str, TOK kind) { - const char[] case_L3 = case_!(str, tok, "Lcommon_3"); + const char[] case_L3 = case_!(str, kind, "Lcommon_3"); } - template case_L2(char[] str, TOK tok) + template case_L2(char[] str, TOK kind) { - const char[] case_L2 = case_!(str, tok, "Lcommon_2"); + const char[] case_L2 = case_!(str, kind, "Lcommon_2"); } - template case_L1(char[] str, TOK tok) + template case_L1(char[] str, TOK kind) { - const char[] case_L3 = case_!(str, tok, "Lcommon"); + const char[] case_L3 = case_!(str, kind, "Lcommon"); } + /// An alternative scan method. + /// Profiling shows it's a bit slower. public void scan_(ref Token t) in { @@ -1095,6 +1111,9 @@ return; } + /// Scans a block comment. + /// + /// BlockComment := "/*" AnyChar* "*/" void scanBlockComment(ref Token t) { assert(p[-1] == '/' && *p == '*'); @@ -1137,6 +1156,9 @@ return; } + /// Scans a nested comment. + /// + /// NestedComment := "/+" (AnyChar* | NestedComment) "+/" void scanNestedComment(ref Token t) { assert(p[-1] == '/' && *p == '+'); @@ -1187,6 +1209,9 @@ return; } + /// Scans the postfix character of a string literal. + /// + /// PostfixChar := "c" | "w" | "d" char scanPostfix() { assert(p[-1] == '"' || p[-1] == '`' || @@ -1205,6 +1230,9 @@ assert(0); } + /// Scans a normal string literal. + /// + /// NormalStringLiteral := "\"" Char* "\"" void scanNormalStringLiteral(ref Token t) { assert(*p == '"'); @@ -1262,6 +1290,9 @@ assert(0); } + /// Scans a character literal. + /// + /// CharLiteral := "'" Char "'" void scanCharacterLiteral(ref Token t) { assert(*p == '\''); @@ -1293,6 +1324,9 @@ t.end = p; } + /// Scans a raw string literal. + /// + /// RawStringLiteral := "r\"" AnyChar* "\"" | "`" AnyChar* "`" void scanRawStringLiteral(ref Token t) { assert(*p == '`' || *p == '"' && p[-1] == 'r'); @@ -1348,6 +1382,9 @@ assert(0); } + /// Scans a hexadecimal string literal. + /// + /// HexStringLiteral := "x\"" (HexChar HexChar)* "\"" void scanHexStringLiteral(ref Token t) { assert(p[0] == 'x' && p[1] == '"'); @@ -1430,6 +1467,16 @@ assert(0); } +version(DDoc) +{ + /// Scans a delimited string literal. + void scanDelimitedStringLiteral(ref Token t); + /// Scans a token string literal. + /// + /// TokenStringLiteral := "q{" Token* "}" + void scanTokenStringLiteral(ref Token t); +} +else version(D2) { void scanDelimitedStringLiteral(ref Token t) @@ -1713,6 +1760,15 @@ } } // version(D2) + /// Scans an escape sequence. + /// + /// EscapeSequence := "\" (Octal{1,3} | ("x" Hex{2}) | + /// ("u" Hex{4}) | ("U" Hex{8}) | + /// "'" | "\"" | "\\" | "?" | "a" | + /// "b" | "f" | "n" | "r" | "t" | "v") + /// Params: + /// isBinary = set to true for octal and hexadecimal escapes. + /// Returns: the escape value. dchar scanEscapeSequence(ref bool isBinary) out(result) { assert(isValidChar(result)); } @@ -1837,17 +1893,17 @@ return REPLACEMENT_CHAR; // Error: return replacement character. } - /* - IntegerLiteral:= (Dec|Hex|Bin|Oct)Suffix? - Dec:= (0|[1-9][0-9_]*) - Hex:= 0[xX] HexDigits - Bin:= 0[bB][01_]+ - Oct:= 0[0-7_]+ - Suffix:= (L[uU]?|[uU]L?) - HexDigits:= [0-9a-zA-Z_]+ - - Invalid: "0b_", "0x_", "._" - */ + /// Scans a number literal. + /// + /// $(PRE + /// IntegerLiteral := (Dec|Hex|Bin|Oct)Suffix? + /// Dec := (0|[1-9][0-9_]*) + /// Hex := 0[xX][_]*[0-9a-zA-Z][0-9a-zA-Z_]* + /// Bin := 0[bB][_]*[01][01_]* + /// Oct := 0[0-7_]* + /// Suffix := (L[uU]?|[uU]L?) + /// ) + /// Invalid: "0b_", "0x_", "._" etc. void scanNumber(ref Token t) { ulong ulong_; @@ -2135,14 +2191,19 @@ return; } - /* - FloatLiteral:= Float[fFL]?i? - Float:= DecFloat | HexFloat - DecFloat:= ([0-9][0-9_]*[.][0-9_]*DecExponent?) | [.][0-9][0-9_]*DecExponent? | [0-9][0-9_]*DecExponent - DecExponent:= [eE][+-]?[0-9][0-9_]* - HexFloat:= 0[xX](HexDigits[.]HexDigits | [.][0-9a-zA-Z]HexDigits? | HexDigits)HexExponent - HexExponent:= [pP][+-]?[0-9][0-9_]* - */ + /// Scans a floating point number literal. + /// + /// $(PRE + /// FloatLiteral := Float[fFL]?i? + /// Float := DecFloat | HexFloat + /// DecFloat := ([0-9][0-9_]*[.][0-9_]*DecExponent?) | + /// [.][0-9][0-9_]*DecExponent? | [0-9][0-9_]*DecExponent + /// DecExponent := [eE][+-]?[0-9][0-9_]* + /// HexFloat := 0[xX](HexDigits[.]HexDigits | + /// [.][0-9a-zA-Z]HexDigits? | + /// HexDigits)HexExponent + /// HexExponent := [pP][+-]?[0-9][0-9_]* + /// ) void scanReal(ref Token t) { if (*p == '.') @@ -2192,6 +2253,7 @@ finalizeFloat(t, buffer); } + /// Scans a hexadecimal floating point number literal. void scanHexReal(ref Token t) { assert(*p == '.' || *p == 'p' || *p == 'P'); @@ -2233,6 +2295,10 @@ error(t.start, mid); } + /// Sets the value of the token. + /// Params: + /// t = receives the value. + /// buffer = the well-formed float number. void finalizeFloat(ref Token t, string buffer) { assert(buffer[$-1] == 0); @@ -2266,7 +2332,9 @@ t.end = p; } - /// Scan special token: #line Integer [Filespec] EndOfLine + /// Scans a special token sequence. + /// + /// SpecialTokenSequence := "#line" Integer Filespec? EndOfLine void scanSpecialTokenSequence(ref Token t) { assert(*p == '#'); @@ -2378,12 +2446,10 @@ error(errorAtColumn, mid); } - /++ - Insert an empty dummy token before t. - - Useful in the parsing phase for representing a node in the AST - that doesn't consume an actual token from the source text. - +/ + /// Inserts an empty dummy token (TOK.Empty) before t. + /// + /// Useful in the parsing phase for representing a node in the AST + /// that doesn't consume an actual token from the source text. Token* insertEmptyTokenBefore(Token* t) { assert(t !is null && t.prev !is null); @@ -2402,21 +2468,30 @@ return new_t; } + /// Returns the error line number. uint errorLineNumber(uint lineNum) { return lineNum - this.lineNum_hline; } + /// Forwards error parameters. void error(char* columnPos, MID mid, ...) { error_(this.lineNum, this.lineBegin, columnPos, mid, _arguments, _argptr); } + /// Forwards error parameters. void error(uint lineNum, char* lineBegin, char* columnPos, MID mid, ...) { error_(lineNum, lineBegin, columnPos, mid, _arguments, _argptr); } + /// Creates an error report and appends it to a list. + /// Params: + /// lineNum = the line number. + /// lineBegin = points to the first character of the current line. + /// columnPos = points to the character where the error is located. + /// mid = the message ID. void error_(uint lineNum, char* lineBegin, char* columnPos, MID mid, TypeInfo[] _arguments, Arg _argptr) { @@ -2430,32 +2505,21 @@ infoMan ~= error; } - Token* getTokens() - { - while (nextToken() != TOK.EOF) - {} - return head; - } - - /// Scan the whole text until EOF is encountered. + /// Scans the whole source text until EOF is encountered. void scanAll() { while (nextToken() != TOK.EOF) {} } - /// HEAD -> Newline -> First Token + /// Returns the first token of the source text. + /// This can be the EOF token. + /// Structure: HEAD -> Newline -> First Token Token* firstToken() { return this.head.next.next; } - static void loadKeywords(ref Identifier[string] table) - { - foreach(k; keywords) - table[k.str] = k; - } - /// Returns true if str is a valid D identifier. static bool isIdentifierString(char[] str) { @@ -2474,24 +2538,20 @@ /// Returns true if str is a keyword or a special token (__FILE__, __LINE__ etc.) static bool isReservedIdentifier(char[] str) { - if (str.length == 0) - return false; + if (!isIdentifierString(str)) + return false; // str is not a valid identifier. - static Identifier[string] reserved_ids_table; - if (reserved_ids_table is null) - Lexer.loadKeywords(reserved_ids_table); + auto id = IdTable.inStatic(str); + if (id is null || id.kind == TOK.Identifier) + return false; // str is not in the table or a normal identifier. - if (!isIdentifierString(str)) - return false; - - return (str in reserved_ids_table) !is null; + return true; } - /++ - Returns true if the current character to be decoded is - a Unicode alpha character. - The current pointer 'p' is not advanced if false is returned. - +/ + /// Returns true if the current character to be decoded is + /// a Unicode alpha character. + /// + /// The current pointer 'p' is not advanced if false is returned. bool isUnicodeAlpha() { assert(!isascii(*p), "check for ASCII char before calling decodeUTF8()."); @@ -2628,6 +2688,7 @@ return d; } + /// Encodes the character d and appends it to str. static void encodeUTF8(ref char[] str, dchar d) { assert(!isascii(d), "check for ASCII char before calling encodeUTF8()."); @@ -2714,6 +2775,7 @@ } } +/// Tests the lexer with a list of tokens. unittest { Stdout("Testing Lexer.\n"); @@ -2791,6 +2853,7 @@ } while (token.kind != TOK.EOF) } +/// Tests the Lexer's peek() method. unittest { Stdout("Testing method Lexer.peek()\n"); diff -r f7688996bf08 -r cf2ad5df025c trunk/src/macros_dil.ddoc --- a/trunk/src/macros_dil.ddoc Fri Feb 29 19:25:21 2008 +0100 +++ b/trunk/src/macros_dil.ddoc Fri Feb 29 22:51:24 2008 +0100 @@ -17,4 +17,6 @@ SYMBOL_ = $1 SYMBOL = $1 -DDD = --- \ No newline at end of file +PRE =
$0
+DDD = --- +DMDBUG = $(LINK2 http://d.puremagic.com/issues/show_bug.cgi?id=$1, $2)