Mercurial > projects > dil

--- a/trunk/src/cmd/Generate.d	Mon Nov 26 20:18:54 2007 +0100
+++ b/trunk/src/cmd/Generate.d	Fri Nov 30 20:17:29 2007 +0100
@@ -78,7 +78,11 @@
   switch (n.category)
   {
   alias NodeCategory NC;
-  case NC.Declaration: remove = "Declaration"; break;
+  case NC.Declaration:
+    if (n.kind == NodeKind.Declarations)
+      return name;
+    remove = "Declaration";
+    break;
   case NC.Statement:
     if (n.kind == NodeKind.Statements)
       return name;
@@ -504,7 +508,7 @@
         print(start[0 .. end - start]);
     }
     print(tags[DP.HLineBegin]);
-    auto num = token.line_num;
+    auto num = token.tokLineNum;
     if (num is null)
     {
       print(token.srcText);
@@ -515,9 +519,9 @@
     auto ptr = token.start;
     printWS(ptr, num.start); // prints "#line" as well
     printToken(num, tags, print);
-    if (token.line_filespec)
+    if (token.tokLineFilespec)
     {
-      auto filespec = token.line_filespec;
+      auto filespec = token.tokLineFilespec;
       // Print whitespace between number and filespec
       printWS(num.end, filespec.start);
       print.format(tags[DP.Filespec], xml_escape(filespec.srcText));
--- a/trunk/src/cmd/Statistics.d	Mon Nov 26 20:18:54 2007 +0100
+++ b/trunk/src/cmd/Statistics.d	Fri Nov 30 20:17:29 2007 +0100
@@ -92,7 +92,7 @@

   Statistics stats;

-  stats.linesOfCode = lx.loc;
+  stats.linesOfCode = lx.lineNum;
   // Traverse linked list.
   while (token.type != TOK.EOF)
   {
--- a/trunk/src/dil/Lexer.d	Mon Nov 26 20:18:54 2007 +0100
+++ b/trunk/src/dil/Lexer.d	Fri Nov 30 20:17:29 2007 +0100
@@ -20,9 +20,9 @@

 const char[3] LS = \u2028; /// Line separator.
 const char[3] PS = \u2029; /// Paragraph separator.
-
 const dchar LSd = 0x2028;
 const dchar PSd = 0x2029;
+static assert(LS[0] == PS[0] && LS[1] == PS[1]);

 /// U+FFFD = �. Used to replace invalid Unicode characters.
 const dchar REPLACEMENT_CHAR = '\uFFFD';
@@ -31,31 +31,35 @@

 class Lexer
 {
-  Token* head; /// The head of the doubly linked token list.
-  Token* tail; /// The tail of the linked list. Set in scan().
-  Token* token; /// Points to the current token in the token list.
-  string text; /// The source text.
-  char[] filePath; /// Path to the source file.
-  char* p; /// Points to the current character in the source text.
-  char* end; /// Points one character past the end of the source text.
+  Token* head;      /// The head of the doubly linked token list.
+  Token* tail;      /// The tail of the linked list. Set in scan().
+  Token* token;     /// Points to the current token in the token list.
+  string text;      /// The source text.
+  char[] filePath;  /// Path to the source text.
+  char* p;          /// Points to the current character in the source text.
+  char* end;        /// Points one character past the end of the source text.

   // Members used for error messages:
   Information[] errors;
-  char* lineBegin; /// Always points to the beginning of the current line.
-  uint loc = 1; /// Actual line of code.
-  uint loc_hline; /// Line number set by #line.
+  /// Always points to the beginning of the current line.
+  char* lineBegin;
+//   Token* newline;     /// Current newline token.
+  uint lineNum = 1;   /// Current, actual source text line number.
+  uint lineNum_hline; /// Line number set by #line.
   uint inTokenString; /// > 0 if inside q{ }
-  Location errorLoc;
+  char[] errorPath;   /// The path displayed in error messages.

   Identifier[string] idtable;

-  version(token2LocTable)
-    /// Maps every token that starts a new line to a Location.
-    Location[Token*] token2LocTable;
-
+  /++
+    Construct a Lexer object.
+    Params:
+      text     = the UTF-8 source code.
+      filePath = the path to the source code; used for error messages.
+  +/
   this(string text, string filePath)
   {
-    this.filePath = filePath;
+    this.filePath = this.errorPath = filePath;

     this.text = text;
     if (text.length == 0 || text[$-1] != 0)
@@ -67,20 +71,25 @@
     this.p = this.text.ptr;
     this.end = this.p + this.text.length;
     this.lineBegin = this.p;
-    this.errorLoc = new Location(filePath, 1, this.lineBegin, this.lineBegin);
     loadKeywords(this.idtable);

     this.head = new Token;
     this.head.type = TOK.HEAD;
+    this.head.start = this.head.end = this.p;
     this.token = this.head;
+    // Add a newline as the first token after the head.
+    auto newline = new Token;
+    newline.type = TOK.Newline;
+    newline.start = newline.end = this.p;
+    newline.filePath = this.errorPath;
+    newline.lineNum = 1;
+    newline.lineNum_hline = 0;
+    // Link in.
+    this.token.next = newline;
+    newline.prev = this.token;
+    this.token = newline;
+//     this.newline = newline;
     scanShebang();
-  version(token2LocTable)
-  {
-    // Add first token to table.
-    auto firstToken = this.head;
-    peek(firstToken);
-    token2LocTable[firstToken] = new Location(1, null);
-  }
   }

   ~this()
@@ -95,36 +104,23 @@
     delete tail;
   }

+  /++
+    The "shebang" may optionally appear once at the beginning of a file.
+    Regexp: #![^\EndOfLine]*
+  +/
   void scanShebang()
   {
     if (*p == '#' && p[1] == '!')
     {
-      Token* t = new Token;
+      auto t = new Token;
+      t.type = TOK.Shebang;
       t.start = p;
-      t.type = TOK.Shebang;
       ++p;
-      assert(*p == '!');
-      while (1)
-      {
-        t.end = ++p;
-        switch (*p)
-        {
-        case '\r', '\n', 0, _Z_:
-          break;
-        case LS[0]:
-          if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
-            break;
-        default:
-          if (*p & 128)
-            decodeUTF8();
-          continue;
-        }
-        break; // Exit loop.
-      }
-      // Reset p. The newline will be scanned as whitespace in scan().
-      p = t.end;
-      this.head.next = t;
-      t.prev = this.head;
+      while (!isEndOfLine(++p))
+        isascii(*p) || decodeUTF8();
+      t.end = p;
+      this.token.next = t;
+      t.prev = this.token;
     }
   }

@@ -134,10 +130,10 @@
     switch (t.type)
     {
     case TOK.FILE:
-      t.str = this.errorLoc.filePath;
+      t.str = this.errorPath;
       break;
     case TOK.LINE:
-      t.uint_ = this.errorLineNum(this.loc);
+      t.uint_ = this.errorLineNumber(this.lineNum);
       break;
     case TOK.DATE,
          TOK.TIME,
@@ -169,24 +165,23 @@
     }
   }

-  void setLineBegin(char* p)
+  private void setLineBegin(char* p)
   {
     // Check that we can look behind one character.
     assert((p-1) >= text.ptr && p < end);
     // Check that previous character is a newline.
-    assert(p[-1] == '\n' ||  p[-1] == '\r' ||
-           p[-1] == LS[2] || p[-1] == PS[2]);
+    assert(isNewlineEnd(p - 1));
     this.lineBegin = p;
   }

-  private void scanNext(bool rescan)(ref Token* t)
+  private void scanNext(ref Token* t)
   {
     assert(t !is null);
     if (t.next)
     {
       t = t.next;
-      static if (rescan == true)
-        rescanNewlines(*t);
+//       if (t.type == TOK.Newline)
+//         this.newline = t;
     }
     else if (t != this.tail)
     {
@@ -198,132 +193,124 @@
     }
   }

+  /// Advance t one token forward.
   void peek(ref Token* t)
   {
-    scanNext!(false)(t);
+    scanNext(t);
   }

+  /// Advance to the next token in the source text.
   TOK nextToken()
   {
-    scanNext!(true)(this.token);
+    scanNext(this.token);
     return this.token.type;
   }

-  void rescanNewlines(ref Token t)
+  /// Returns true if d is a Unicode line or paragraph separator.
+  static bool isUnicodeNewlineChar(dchar d)
   {
-    auto p = t.ws;
-    auto end = t.start;
+    return d == LSd || d == PSd;
+  }

-    if (p !is null)
-    {
-      assert(end !is null);
-      // Scan preceding whitespace for newlines.
-      do
-      {
-        switch (*p)
-        {
-        case '\r':
-          if (p[1] == '\n')
-            ++p;
-        case '\n':
-          ++loc;
-          setLineBegin(p + 1);
-          break;
-        case LS[0]:
-          assert(p+2 < end && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]));
-          ++p; ++p;
-          ++loc;
-          setLineBegin(p + 1);
-          break;
-        default:
-          assert(isspace(*p));
-        }
-        ++p;
-      } while (p < end)
-    }
+  /// Returns true if p points to a line or paragraph separator.
+  static bool isUnicodeNewline(char* p)
+  {
+    return *p == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]);
+  }

-    if (t.type == TOK.String && t.start[0] != '\\' ||
-        t.type == TOK.Comment && t.start[1] != '/')
-    {
-      // String literals and comments are the only tokens that can have
-      // newlines.
-      p = t.start;
-      end = t.end;
-      assert(p !is null && end !is null);
-      do
-      {
-        switch (*p)
-        {
-        case '\r':
-          if (p[1] == '\n')
-            ++p;
-        case '\n':
-          ++loc;
-          setLineBegin(p + 1);
-          break;
-        case LS[0]:
-          if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
-          {
-            ++p; ++p;
-            ++loc;
-            setLineBegin(p + 1);
-            break;
-          }
-        default:
-        }
-        ++p;
-      } while (p < end)
-    }
-    else
-    {
-      if (t.type == TOK.HashLine)
-        evaluateHashLine(t);
+  /++
+    Returns true if p points to the start of a Newline.
+    Newline: \n | \r | \r\n | LS | PS
+  +/
+  static bool isNewline(char* p)
+  {
+    return *p == '\n' || *p == '\r' || isUnicodeNewline(p);
+  }

-      assert(delegate() {
-          p = t.start;
-          end = t.end;
-          while (p < end)
-          {
-            if (*p == '\n' || *p == '\r' ||
-                (p+2) < end && *p == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
-              return false;
-            ++p;
-          }
+  /// Returns true if p points to the last character of a Newline.
+  bool isNewlineEnd(char* p)
+  {
+    if (*p == '\n' || *p == '\r')
+      return true;
+    if (*p == LS[2] || *p == PS[2])
+      if ((p-2) >= text.ptr)
+        if (p[-1] == LS[1] && p[-2] == LS[0])
           return true;
-        }() == true, "Token '" ~ t.srcText ~ "' has unexpected newline."
-      );
-    }
+    return false;
+  }
+
+  /++
+    Returns true if p points to the first character of an EndOfLine.
+    EndOfLine: Newline | 0 | _Z_
+  +/
+  static bool isEndOfLine(char* p)
+  {
+    return isNewline(p) || *p == 0 || *p == _Z_;
   }

-  struct LocState
+  /++
+    Scans a Newline and sets p one character past it.
+    Returns '\n' if scanned or 0 otherwise.
+  +/
+  static dchar scanNewline(ref char* p)
   {
-    char[] filePath;
-    uint loc;
-    uint loc_hline;
-    char* lineBegin;
+    switch (*p)
+    {
+    case '\r':
+      if (p[1] == '\n')
+        ++p;
+    case '\n':
+      ++p;
+      return '\n';
+    default:
+      if (isUnicodeNewline(p))
+      {
+        ++p; ++p; ++p;
+        return '\n';
+      }
+    }
+    return 0;
   }

-  LocState getState()
+  /// Returns a Location for the given token.
+  static Location getLocation(Token* token)
   {
-    LocState s;
-    s.filePath = this.errorLoc.filePath;
-    s.lineBegin = this.lineBegin;
-    s.loc_hline = this.loc_hline;
-    s.loc = this.loc;
-    return s;
+    auto search_t = token.prev;
+    // Find previous newline token.
+    while (search_t.type != TOK.Newline)
+      search_t = search_t.prev;
+    auto filePath  = search_t.filePath;
+    auto lineNum   = search_t.lineNum - search_t.lineNum_hline;
+    auto lineBegin = search_t.end;
+    // Determine actual line begin and line number.
+    while (1)
+    {
+      search_t = search_t.next;
+      if (search_t == token)
+        break;
+      // Multiline tokens must be rescanned for newlines.
+      if (search_t.isMultiline)
+      {
+        auto p = search_t.start, end = search_t.end;
+        while (p != end)
+        {
+          if (Lexer.scanNewline(p) == '\n')
+          {
+            lineBegin = p;
+            ++lineNum;
+          }
+          else
+          ++p;
+        }
+      }
+    }
+    return new Location(filePath, lineNum, lineBegin, token.start);
   }

-  void restoreState(LocState s)
-  {
-    if (s.lineBegin == this.lineBegin)
-      return;
-    assert(s.loc != this.loc);
-    this.errorLoc.setFilePath(s.filePath);
-    this.lineBegin = s.lineBegin;
-    this.loc = s.loc;
-    this.loc_hline = s.loc_hline;
-  }
-
+  /++
+    This is the old scan method.
+    TODO: profile old and new to see which one is faster.
+  +/
   public void scan_(out Token t)
   in
   {
@@ -337,50 +324,43 @@
   body
   {
     // Scan whitespace.
-    auto pws = p;
-    auto old_loc = this.loc;
-    while (1)
+    if (isspace(*p))
     {
+      t.ws = p;
+      while (isspace(*++p))
+      {}
+    }
+
+    // Scan a token.
+    uint c = *p;
+    {
+      t.start = p;
+      // Newline.
       switch (*p)
       {
       case '\r':
         if (p[1] == '\n')
           ++p;
       case '\n':
-        assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]);
+        assert(isNewlineEnd(p));
         ++p;
-        ++loc;
+        ++lineNum;
         setLineBegin(p);
-        continue;
-      case LS[0]:
-        if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
+//         this.newline = &t;
+        t.type = TOK.Newline;
+        t.filePath = this.errorPath;
+        t.lineNum = lineNum;
+        t.lineNum_hline = lineNum_hline;
+        t.end = p;
+        return;
+      default:
+        if (isUnicodeNewline(p))
         {
           ++p; ++p;
           goto case '\n';
         }
-        // goto default;
-      default:
-        if (!isspace(*p))
-          break;
-        ++p;
-        continue;
       }
-      break; // Exit loop.
-    }
-
-    if (p != pws)
-    {
-      t.ws = pws;
-      if (old_loc != this.loc)
-        version(token2LocTable)
-          token2LocTable[&t] = new Location(loc, null);
-    }
-
-    // Scan token.
-    uint c = *p;
-    {
-      t.start = p;
-
+      // Identifier or string literal.
       if (isidbeg(c))
       {
         if (c == 'r' && p[1] == '"' && ++p)
@@ -394,10 +374,11 @@
         if (c == 'q' && p[1] == '{')
           return scanTokenStringLiteral(t);
       }
+        // Scan identifier.
       Lidentifier:
         do
         { c = *++p; }
-        while (isident(c) || c & 128 && isUniAlpha(decodeUTF8()))
+        while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8()))

         t.end = p;

@@ -443,23 +424,8 @@
         case '*':
           return scanBlockComment(t);
         case '/':
-          while (1)
-          {
-            c = *++p;
-            switch (c)
-            {
-            case '\r', '\n', 0, _Z_:
-              break;
-            case LS[0]:
-              if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
-                break;
-            default:
-              if (c & 128)
-                decodeUTF8();
-              continue;
-            }
-            break; // Exit loop.
-          }
+          while (!isEndOfLine(++p))
+            isascii(*p) || decodeUTF8();
           t.type = TOK.Comment;
           t.end = p;
           return;
@@ -483,7 +449,7 @@
         do
         {
           c = scanEscapeSequence();
-          if (c < 128)
+          if (isascii(c))
             buffer ~= c;
           else
             encodeUTF8(buffer, c);
@@ -749,7 +715,7 @@
         return;
       }

-      if (c & 128)
+      if (!isascii(c))
       {
         c = decodeUTF8();
         if (isUniAlpha(c))
@@ -818,48 +784,41 @@
   body
   {
     // Scan whitespace.
-    auto pws = p;
-    auto old_loc = this.loc;
-    while (1)
+    if (isspace(*p))
     {
-      switch (*p)
-      {
-      case '\r':
-        if (p[1] == '\n')
-          ++p;
-      case '\n':
-        assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]);
-        ++p;
-        ++loc;
-        setLineBegin(p);
-        continue;
-      case LS[0]:
-        if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
-        {
-          ++p; ++p;
-          goto case '\n';
-        }
-        // goto default;
-      default:
-        if (!isspace(*p))
-          break;
-        ++p;
-        continue;
-      }
-      break; // Exit loop.
+      t.ws = p;
+      while (isspace(*++p))
+      {}
     }

-    if (p != pws)
+    // Scan a token.
+    t.start = p;
+    // Newline.
+    switch (*p)
     {
-      t.ws = pws;
-      if (old_loc != this.loc)
-        version(token2LocTable)
-          token2LocTable[&t] = new Location(loc, null);
+    case '\r':
+      if (p[1] == '\n')
+        ++p;
+    case '\n':
+      assert(isNewlineEnd(p));
+      ++p;
+      ++lineNum;
+      setLineBegin(p);
+//       this.newline = &t;
+      t.type = TOK.Newline;
+      t.filePath = this.errorPath;
+      t.lineNum = lineNum;
+      t.lineNum_hline = lineNum_hline;
+      t.end = p;
+      return;
+    default:
+      if (isUnicodeNewline(p))
+      {
+        ++p; ++p;
+        goto case '\n';
+      }
     }

-    // Scan token.
-    t.start = p;
-
     uint c = *p;
     assert(end - p != 0);
     switch (end - p)
@@ -956,23 +915,8 @@
     case toUint!("//"):
       ++p; // Skip /
       assert(*p == '/');
-      while (1)
-      {
-        c = *++p;
-        switch (c)
-        {
-        case '\r', '\n', 0, _Z_:
-          break;
-        case LS[0]:
-          if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
-            break;
-        default:
-          if (c & 128)
-            decodeUTF8();
-          continue;
-        }
-        break; // Exit loop.
-      }
+      while (!isEndOfLine(++p))
+        isascii(*p) || decodeUTF8();
       t.type = TOK.Comment;
       t.end = p;
       return;
@@ -1070,7 +1014,7 @@
       do
       {
         c = scanEscapeSequence();
-        if (c < 128)
+        if (isascii(c))
           buffer ~= c;
         else
           encodeUTF8(buffer, c);
@@ -1182,10 +1126,11 @@
       if (c == 'q' && p[1] == '{')
         return scanTokenStringLiteral(t);
     }
+      // Scan identifier.
     Lidentifier:
       do
       { c = *++p; }
-      while (isident(c) || c & 128 && isUniAlpha(decodeUTF8()))
+      while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8()))

       t.end = p;

@@ -1227,7 +1172,7 @@
       return;
     }

-    if (c & 128)
+    if (!isascii(c))
     {
       c = decodeUTF8();
       if (isUniAlpha(c))
@@ -1246,7 +1191,7 @@
   void scanBlockComment(ref Token t)
   {
     assert(p[-1] == '/' && *p == '*');
-    auto tokenLineNum = loc;
+    auto tokenLineNum = lineNum;
     auto tokenLineBegin = lineBegin;
     uint c;
     while (1)
@@ -1259,18 +1204,18 @@
         if (p[1] == '\n')
           ++p;
       case '\n':
-        assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]);
-        ++loc;
+        assert(isNewlineEnd(p));
+        ++lineNum;
         setLineBegin(p+1);
         continue;
       case 0, _Z_:
         error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedBlockComment);
         goto LreturnBC;
       default:
-        if (c & 128)
+        if (!isascii(c))
         {
           c = decodeUTF8();
-          if (c == LSd || c == PSd)
+          if (isUnicodeNewlineChar(c))
             goto case '\n';
           continue;
         }
@@ -1297,7 +1242,7 @@
   void scanNestedComment(ref Token t)
   {
     assert(p[-1] == '/' && *p == '+');
-    auto tokenLineNum = loc;
+    auto tokenLineNum = lineNum;
     auto tokenLineBegin = lineBegin;
     uint level = 1;
     uint c;
@@ -1311,18 +1256,18 @@
         if (p[1] == '\n')
           ++p;
       case '\n':
-        assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]);
-        ++loc;
+        assert(isNewlineEnd(p));
+        ++lineNum;
         setLineBegin(p+1);
         continue;
       case 0, _Z_:
         error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedNestedComment);
         goto LreturnNC;
       default:
-        if (c & 128)
+        if (!isascii(c))
         {
           c = decodeUTF8();
-          if (c == LSd || c == PSd)
+          if (isUnicodeNewlineChar(c))
             goto case '\n';
           continue;
         }
@@ -1353,13 +1298,31 @@
     assert(0);
   }

+  char scanPostfix()
+  {
+    assert(p[-1] == '"' || p[-1] == '`' ||
+      { version(D2) return p[-1] == '}';
+               else return 0; }()
+    );
+    switch (*p)
+    {
+    case 'c':
+    case 'w':
+    case 'd':
+      return *p++;
+    default:
+      return 0;
+    }
+    assert(0);
+  }
+
   void scanNormalStringLiteral(ref Token t)
   {
     assert(*p == '"');
-    auto tokenLineNum = loc;
+    auto tokenLineNum = lineNum;
     auto tokenLineBegin = lineBegin;
+    t.type = TOK.String;
     char[] buffer;
-    t.type = TOK.String;
     uint c;
     while (1)
     {
@@ -1368,16 +1331,15 @@
       {
       case '"':
         ++p;
+        t.pf = scanPostfix();
       Lreturn:
-        buffer ~= 0;
-        t.str = buffer;
-        t.pf = scanPostfix();
+        t.str = buffer ~ '\0';
         t.end = p;
         return;
       case '\\':
         c = scanEscapeSequence();
         --p;
-        if (c < 128)
+        if (isascii(c))
           break;
         encodeUTF8(buffer, c);
         continue;
@@ -1385,21 +1347,20 @@
         if (p[1] == '\n')
           ++p;
       case '\n':
-        assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]);
-        ++loc;
-        c = '\n'; // Convert EndOfLine to \n.
+        assert(isNewlineEnd(p));
+        c = '\n'; // Convert Newline to \n.
+        ++lineNum;
         setLineBegin(p+1);
         break;
       case 0, _Z_:
         error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedString);
         goto Lreturn;
       default:
-        if (c & 128)
+        if (!isascii(c))
         {
           c = decodeUTF8();
-          if (c == LSd || c == PSd)
+          if (isUnicodeNewlineChar(c))
             goto case '\n';
-
           encodeUTF8(buffer, c);
           continue;
         }
@@ -1413,41 +1374,32 @@
   void scanCharacterLiteral(ref Token t)
   {
     assert(*p == '\'');
-    MID id = MID.UnterminatedCharacterLiteral;
     ++p;
-    TOK type = TOK.CharLiteral;
+    t.type = TOK.CharLiteral;
     switch (*p)
     {
     case '\\':
       switch (p[1])
       {
       case 'u':
-        type = TOK.WCharLiteral; break;
+        t.type = TOK.WCharLiteral; break;
       case 'U':
-        type = TOK.DCharLiteral; break;
+        t.type = TOK.DCharLiteral; break;
       default:
       }
       t.dchar_ = scanEscapeSequence();
       break;
     case '\'':
-      ++p;
-      id = MID.EmptyCharacterLiteral;
-    // fall through
-    case '\n', '\r', 0, _Z_:
-      goto Lerr;
-    case LS[0]:
-      if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
-        goto Lerr;
-    // fall through
+      error(t.start, MID.EmptyCharacterLiteral);
+      break;
     default:
+      if (isEndOfLine(p))
+        break;
       uint c = *p;
-      if (c & 128)
+      if (!isascii(c))
       {
         c = decodeUTF8();
-        if (c <= 0xFFFF)
-          type = TOK.WCharLiteral;
-        else
-          type = TOK.DCharLiteral;
+        t.type = c <= 0xFFFF ? TOK.WCharLiteral : TOK.DCharLiteral;
       }
       t.dchar_ = c;
       ++p;
@@ -1456,33 +1408,17 @@
     if (*p == '\'')
       ++p;
     else
-    Lerr:
-      error(t.start, id);
-    t.type = type;
+      error(t.start, MID.UnterminatedCharacterLiteral);
     t.end = p;
   }

-  char scanPostfix()
-  {
-    switch (*p)
-    {
-    case 'c':
-    case 'w':
-    case 'd':
-      return *p++;
-    default:
-      return 0;
-    }
-    assert(0);
-  }
-
   void scanRawStringLiteral(ref Token t)
   {
-    auto tokenLineNum = loc;
+    assert(*p == '`' || *p == '"' && p[-1] == 'r');
+    auto tokenLineNum = lineNum;
     auto tokenLineBegin = lineBegin;
+    t.type = TOK.String;
     uint delim = *p;
-    assert(delim == '`' || delim == '"' && p[-1] == 'r');
-    t.type = TOK.String;
     char[] buffer;
     uint c;
     while (1)
@@ -1494,9 +1430,9 @@
         if (p[1] == '\n')
           ++p;
       case '\n':
-        assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]);
-        c = '\n'; // Convert EndOfLine ('\r','\r\n','\n',LS,PS) to '\n'
-        ++loc;
+        assert(isNewlineEnd(p));
+        c = '\n'; // Convert Newline to '\n'.
+        ++lineNum;
         setLineBegin(p+1);
         break;
       case '`':
@@ -1512,16 +1448,14 @@
         }
         break;
       case 0, _Z_:
-        if (delim == 'r')
-          error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedRawString);
-        else
-          error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedBackQuoteString);
+        error(tokenLineNum, tokenLineBegin, t.start,
+          delim == 'r' ? MID.UnterminatedRawString : MID.UnterminatedBackQuoteString);
         goto Lreturn;
       default:
-        if (c & 128)
+        if (!isascii(c))
         {
           c = decodeUTF8();
-          if (c == LSd || c == PSd)
+          if (isUnicodeNewlineChar(c))
             goto case '\n';
           encodeUTF8(buffer, c);
           continue;
@@ -1538,7 +1472,7 @@
     assert(p[0] == 'x' && p[1] == '"');
     t.type = TOK.String;

-    auto tokenLineNum = loc;
+    auto tokenLineNum = lineNum;
     auto tokenLineBegin = lineBegin;

     uint c;
@@ -1554,21 +1488,20 @@
       switch (c)
       {
       case '"':
-        ++p;
         if (n & 1)
           error(tokenLineNum, tokenLineBegin, t.start, MID.OddNumberOfDigitsInHexString);
+        ++p;
         t.pf = scanPostfix();
       Lreturn:
-        buffer ~= 0;
-        t.str = cast(string) buffer;
+        t.str = cast(string) (buffer ~= 0);
         t.end = p;
         return;
       case '\r':
         if (p[1] == '\n')
           ++p;
       case '\n':
-        assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]);
-        ++loc;
+        assert(isNewlineEnd(p));
+        ++lineNum;
         setLineBegin(p+1);
         continue;
       default:
@@ -1594,11 +1527,6 @@
         }
         else if (isspace(c))
           continue; // Skip spaces.
-        else if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
-        {
-          ++p; ++p;
-          goto case '\n';
-        }
         else if (c == 0 || c == _Z_)
         {
           error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedHexString);
@@ -1608,8 +1536,12 @@
         else
         {
           auto errorAt = p;
-          if (c & 128)
+          if (!isascii(c))
+          {
             c = decodeUTF8();
+            if (isUnicodeNewlineChar(c))
+              goto case '\n';
+          }
           error(errorAt, MID.NonHexCharInHexString, cast(dchar)c);
         }
       }
@@ -1624,7 +1556,7 @@
     assert(p[0] == 'q' && p[1] == '"');
     t.type = TOK.String;

-    auto tokenLineNum = loc;
+    auto tokenLineNum = lineNum;
     auto tokenLineBegin = lineBegin;

     char[] buffer;
@@ -1656,32 +1588,30 @@
           if (p[1] == '\n')
             ++p;
         case '\n':
-          assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]);
+          assert(isNewlineEnd(p));
           ++p;
-          ++loc;
+          ++lineNum;
           setLineBegin(p);
           return '\n';
-        case LS[0]:
-          if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
+        default:
+          if (isUnicodeNewline(p))
           {
             ++p; ++p;
             goto case '\n';
           }
-        default:
         }
         return 0;
       }
-
       // Skip leading newlines:
-      while (scanNewline() != 0){}
-      assert(*p != '\n' && *p != '\r' &&
-             !(*p == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])));
+      while (scanNewline() != 0)
+      {}
+      assert(!isNewline(p));

       char* begin = p;
       c = *p;
       closing_delim = c;
       // TODO: Check for non-printable characters?
-      if (c & 128)
+      if (!isascii(c))
       {
         closing_delim = decodeUTF8();
         if (!isUniAlpha(closing_delim))
@@ -1693,7 +1623,7 @@
       // Parse Identifier + EndOfLine
       do
       { c = *++p; }
-      while (isident(c) || c & 128 && isUniAlpha(decodeUTF8()))
+      while (isident(c) || !isascii(c) && isUniAlpha(decodeUTF8()))
       // Store identifier
       str_delim = begin[0..p-begin];
       // Scan newline
@@ -1724,20 +1654,20 @@
         if (p[1] == '\n')
           ++p;
       case '\n':
-        assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]);
-        c = '\n'; // Convert EndOfLine ('\r','\r\n','\n',LS,PS) to '\n'
-        ++loc;
+        assert(isNewlineEnd(p));
+        c = '\n'; // Convert Newline to '\n'.
+        ++lineNum;
         setLineBegin(p+1);
         break;
       case 0, _Z_:
         // TODO: error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedDelimitedString);
         goto Lreturn3;
       default:
-        if (c & 128)
+        if (!isascii(c))
         {
           auto begin = p;
           c = decodeUTF8();
-          if (c == LSd || c == PSd)
+          if (isUnicodeNewlineChar(c))
             goto case '\n';
           if (c == closing_delim)
           {
@@ -1804,14 +1734,14 @@
     assert(p[0] == 'q' && p[1] == '{');
     t.type = TOK.String;

-    auto tokenLineNum = loc;
+    auto tokenLineNum = lineNum;
     auto tokenLineBegin = lineBegin;

     // A guard against changes to particular members:
-    // this.loc_hline and this.errorLoc.filePath
+    // this.lineNum_hline and this.errorPath
     ++inTokenString;

-    uint loc = this.loc;
+    uint lineNum = this.lineNum;
     uint level = 1;

     ++p; ++p; // Skip q{
@@ -1868,10 +1798,10 @@
       t.end = p;
       buffer = t.srcText[2..$-1].dup ~ '\0';
       t.pf = scanPostfix();
-      t.end = p;
+      t.end = p; // Assign again because of postfix.
     }
-    // Convert EndOfLines to '\n'
-    if (loc != this.loc)
+    // Convert newlines to '\n'.
+    if (lineNum != this.lineNum)
     {
       assert(buffer[$-1] == '\0');
       uint i, j;
@@ -1882,20 +1812,18 @@
           if (buffer[i+1] == '\n')
             ++i;
         case '\n':
-          buffer[j++] = '\n';
+          assert(isNewlineEnd(buffer.ptr + i));
+          buffer[j++] = '\n'; // Convert Newline to '\n'.
           break;
-        case LS[0]:
-          auto b = buffer[i..$];
-          if (b[1] == LS[1] && (b[2] == LS[2] || b[2] == PS[2]))
+        default:
+          if (isUnicodeNewline(buffer.ptr + i))
           {
             ++i; ++i;
             goto case '\n';
           }
-          // goto default;
-        default:
-          buffer[j++] = buffer[i]; // Copy character
+          buffer[j++] = buffer[i]; // Copy.
         }
-      buffer.length = j; // Adjust length
+      buffer.length = j; // Adjust length.
     }
     assert(buffer[$-1] == '\0');
     t.str = buffer;
@@ -2004,22 +1932,16 @@
         else
           error(sequenceStart, MID.InvalidBeginHTMLEntity);
       }
-      else if (*p == '\n' || *p == '\r' ||
-               *p == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
-      {
-        error(sequenceStart, MID.UndefinedEscapeSequence, r"\NewLine");
-      }
-      else if (*p == 0 || *p == _Z_)
-      {
-        error(sequenceStart, MID.UndefinedEscapeSequence, r"\EOF");
-      }
+      else if (isEndOfLine(p))
+        error(sequenceStart, MID.UndefinedEscapeSequence,
+          (*p == 0 || *p == _Z_) ? `\EOF` : `\NewLine`);
       else
       {
         char[] str = `\`;
-        if (*p & 128)
+        if (isascii(c))
+          str ~= *p;
+        else
           encodeUTF8(str, decodeUTF8());
-        else
-          str ~= *p;
         ++p;
         // TODO: check for unprintable character?
         error(sequenceStart, MID.UndefinedEscapeSequence, str);
@@ -2479,85 +2401,63 @@

     State state = State.Integer;

-  Loop:
-    while (1)
+    while (!isEndOfLine(++p))
     {
-      switch (*++p)
+      if (isspace(*p))
+        continue;
+      if (state == State.Integer)
       {
-      case LS[0]:
-        if (!(p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])))
-          goto default;
-      case '\r', '\n', 0, _Z_:
-        break Loop;
-      default:
-        if (isspace(*p))
-          continue;
-        if (state == State.Integer)
+        if (!isdigit(*p))
+        {
+          errorAtColumn = p;
+          mid = MID.ExpectedIntegerAfterSTLine;
+          goto Lerr;
+        }
+        t.tokLineNum = new Token;
+        scan(*t.tokLineNum);
+        if (t.tokLineNum.type != TOK.Int32 && t.tokLineNum.type != TOK.Uint32)
         {
-          if (!isdigit(*p))
+          errorAtColumn = t.tokLineNum.start;
+          mid = MID.ExpectedIntegerAfterSTLine;
+          goto Lerr;
+        }
+        --p; // Go one back because scan() advanced p past the integer.
+        state = State.Filespec;
+      }
+      else if (state == State.Filespec)
+      {
+        if (*p != '"')
+        {
+          errorAtColumn = p;
+          mid = MID.ExpectedFilespec;
+          goto Lerr;
+        }
+        t.tokLineFilespec = new Token;
+        t.tokLineFilespec.start = p;
+        t.tokLineFilespec.type = TOK.Filespec;
+        while (*++p != '"')
+        {
+          if (isEndOfLine(p))
           {
-            errorAtColumn = p;
-            mid = MID.ExpectedIntegerAfterSTLine;
-            goto Lerr;
-          }
-          t.line_num = new Token;
-          scan(*t.line_num);
-          if (t.line_num.type != TOK.Int32 && t.line_num.type != TOK.Uint32)
-          {
-            errorAtColumn = t.line_num.start;
-            mid = MID.ExpectedIntegerAfterSTLine;
-            goto Lerr;
-          }
-          --p; // Go one back because scan() advanced p past the integer.
-          state = State.Filespec;
-        }
-        else if (state == State.Filespec)
-        {
-          if (*p != '"')
-          {
-            errorAtColumn = p;
-            mid = MID.ExpectedFilespec;
+            errorAtColumn = t.tokLineFilespec.start;
+            mid = MID.UnterminatedFilespec;
+            t.tokLineFilespec.end = p;
             goto Lerr;
           }
-          t.line_filespec = new Token;
-          t.line_filespec.start = p;
-          t.line_filespec.type = TOK.Filespec;
-          while (1)
-          {
-            switch (*++p)
-            {
-            case '"':
-              break;
-            case LS[0]:
-              if (!(p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])))
-                goto default;
-            case '\r', '\n', 0, _Z_:
-              errorAtColumn = t.line_filespec.start;
-              mid = MID.UnterminatedFilespec;
-              t.line_filespec.end = p;
-              goto Lerr;
-            default:
-              if (*p & 128)
-                decodeUTF8();
-              continue;
-            }
-            break; // Exit loop.
-          }
-          auto start = t.line_filespec.start +1; // +1 skips '"'
-          t.line_filespec.str = start[0 .. p - start];
-          t.line_filespec.end = p + 1;
-          state = State.End;
+          isascii(*p) || decodeUTF8();
         }
-        else/+ if (state == State.End)+/
-        {
-          mid = MID.UnterminatedSpecialToken;
-          goto Lerr;
-        }
+        auto start = t.tokLineFilespec.start +1; // +1 skips '"'
+        t.tokLineFilespec.str = start[0 .. p - start];
+        t.tokLineFilespec.end = p + 1;
+        state = State.End;
+      }
+      else/+ if (state == State.End)+/
+      {
+        mid = MID.UnterminatedSpecialToken;
+        goto Lerr;
       }
     }
-    assert(*p == '\r' || *p == '\n' || *p == 0 || *p == _Z_ ||
-           *p == LS[0] && (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
-    );
+    assert(isEndOfLine(p));

     if (state == State.Integer)
     {
@@ -2567,8 +2467,12 @@
     }

     // Evaluate #line only when not in token string.
-    if (!inTokenString)
-      evaluateHashLine(t);
+    if (!inTokenString && t.tokLineNum)
+    {
+      this.lineNum_hline = this.lineNum - t.tokLineNum.uint_ + 1;
+      if (t.tokLineFilespec)
+        this.errorPath = t.tokLineFilespec.str;
+    }
     t.end = p;

     return;
@@ -2577,18 +2481,7 @@
     error(errorAtColumn, mid);
   }

-  void evaluateHashLine(ref Token t)
-  {
-    assert(t.type == TOK.HashLine);
-    if (t.line_num)
-    {
-      this.loc_hline = this.loc - t.line_num.uint_ + 1;
-      if (t.line_filespec)
-        this.errorLoc.setFilePath(t.line_filespec.str);
-    }
-  }
-
-  /+
+  /++
     Insert an empty dummy token before t.
     Useful in the parsing phase for representing a node in the AST
     that doesn't consume an actual token from the source text.
@@ -2611,31 +2504,28 @@
     return new_t;
   }

-  void updateErrorLoc(char* columnPos)
-  {
-    updateErrorLoc(this.loc, this.lineBegin, columnPos);
-  }
-
-  void updateErrorLoc(uint lineNum, char* lineBegin, char* columnPos)
+  uint errorLineNumber(uint lineNum)
   {
-    errorLoc.set(this.errorLineNum(lineNum), lineBegin, columnPos);
-  }
-
-  uint errorLineNum(uint loc)
-  {
-    return loc - this.loc_hline;
+    return lineNum - this.lineNum_hline;
   }

   void error(char* columnPos, MID mid, ...)
   {
-    updateErrorLoc(columnPos);
-    errors ~= new Information(InfoType.Lexer, mid, errorLoc.clone, Format(_arguments, _argptr, GetMsg(mid)));
+    error_(this.lineNum, this.lineBegin, columnPos, mid, _arguments, _argptr);
   }

   void error(uint lineNum, char* lineBegin, char* columnPos, MID mid, ...)
   {
-    updateErrorLoc(lineNum, lineBegin, columnPos);
-    errors ~= new Information(InfoType.Lexer, mid, errorLoc.clone, Format(_arguments, _argptr, GetMsg(mid)));
+    error_(lineNum, lineBegin, columnPos, mid, _arguments, _argptr);
+  }
+
+  void error_(uint lineNum, char* lineBegin, char* columnPos, MID mid,
+              TypeInfo[] _arguments, void* _argptr)
+  {
+    lineNum = this.errorLineNumber(lineNum);
+    auto location = new Location(errorPath, lineNum, lineBegin, columnPos);
+    auto msg = Format(_arguments, _argptr, GetMsg(mid));
+    errors ~= new Information(InfoType.Lexer, mid, location, msg);
   }

   Token* getTokens()
@@ -2671,8 +2561,7 @@

     try
     {
-      if (isidbeg(ident[0]) ||
-          ident[0] & 128 && isFirstCharUniAlpha())
+      if (isidbeg(ident[0]) || !isascii(ident[0]) && isFirstCharUniAlpha())
       {
         foreach (dchar c; ident[idx..$])
           if (!isident(c) && !isUniAlpha(c))
@@ -2872,11 +2761,12 @@
   Stdout("Testing Lexer.\n");
   struct Pair
   {
-    char[] token;
+    char[] tokenText;
     TOK type;
   }
   static Pair[] pairs = [
-    {"//çay\n", TOK.Comment},       {"&",       TOK.AndBinary},
+    {"//çay",   TOK.Comment},       {"\n",      TOK.Newline},
+                                    {"&",       TOK.AndBinary},
     {"/*çağ*/", TOK.Comment},       {"&&",      TOK.AndLogical},
     {"/+çak+/", TOK.Comment},       {"&=",      TOK.AndAssign},
     {">",       TOK.Greater},       {"+",       TOK.Plus},
@@ -2908,29 +2798,34 @@
     {"?",       TOK.Question},      {",",       TOK.Comma},
     {"$",       TOK.Dollar},        {"cam",     TOK.Identifier},
     {"çay",     TOK.Identifier},    {".0",      TOK.Float64},
-    {"0",       TOK.Int32},
+    {"0",       TOK.Int32},         {"\n",      TOK.Newline},
+    {"\r",      TOK.Newline},       {"\r\n",    TOK.Newline},
+    {"\u2028",  TOK.Newline},       {"\u2029",  TOK.Newline}
   ];

   char[] src;

-  foreach (pair; pairs)
-    src ~= pair.token ~ " ";
-
-  assert(pairs[0].token == "//çay\n");
-  // Remove \n after src has been constructed.
-  // It won't be part of the scanned token string.
-  pairs[0].token = "//çay";
+  // Join all token texts into a single string.
+  foreach (i, pair; pairs)
+    if (pair.type == TOK.Comment && pair.tokenText[1] == '/') // Line comment.
+    {
+      assert(pairs[i+1].type == TOK.Newline); // Must be followed by a newline.
+      src ~= pair.tokenText;
+    }
+    else
+      src ~= pair.tokenText ~ " ";

   auto lx = new Lexer(src, "");
   auto token = lx.getTokens();

   uint i;
   assert(token == lx.head);
-  token = token.next;
+  assert(token.next.type == TOK.Newline);
+  token = token.next.next;
   do
   {
     assert(i < pairs.length);
-    assert(token.srcText == pairs[i].token, Format("Scanned '{0}' but expected '{1}'", token.srcText, pairs[i].token));
+    assert(token.srcText == pairs[i].tokenText, Format("Scanned '{0}' but expected '{1}'", token.srcText, pairs[i].tokenText));
     ++i;
     token = token.next;
   } while (token.type != TOK.EOF)
@@ -2942,7 +2837,9 @@
   string sourceText = "unittest { }";
   auto lx = new Lexer(sourceText, null);

-  Token* next = lx.head;
+  auto next = lx.head;
+  lx.peek(next);
+  assert(next.type == TOK.Newline);
   lx.peek(next);
   assert(next.type == TOK.Unittest);
   lx.peek(next);
@@ -2951,6 +2848,13 @@
   assert(next.type == TOK.RBrace);
   lx.peek(next);
   assert(next.type == TOK.EOF);
+
+  lx = new Lexer("", null);
+  next = lx.head;
+  lx.peek(next);
+  assert(next.type == TOK.Newline);
+  lx.peek(next);
+  assert(next.type == TOK.EOF);
 }

 unittest
--- a/trunk/src/dil/Parser.d	Mon Nov 26 20:18:54 2007 +0100
+++ b/trunk/src/dil/Parser.d	Fri Nov 30 20:17:29 2007 +0100
@@ -109,19 +109,17 @@
   +/
   ReturnType try_(ReturnType)(ReturnType delegate() parseMethod, out bool success)
   {
-    ++trying;
     auto oldToken     = this.token;
     auto oldPrevToken = this.prevToken;
     auto oldCount     = this.errorCount;
-    auto lexerState   = this.lx.getState();
-
+
+    ++trying;
     auto result = parseMethod();
-
+    --trying;
     // Check if an error occurred.
     if (errorCount != oldCount)
     {
       // Restore members.
-      lx.restoreState(lexerState);
       token      = oldToken;
       prevToken  = oldPrevToken;
       lx.token   = oldToken;
@@ -130,7 +128,6 @@
     }
     else
       success = true;
-    --trying;
     return result;
   }

@@ -144,7 +141,7 @@
   }

   /++
-    Returns true if set() has been called on a node, or false otherwise.
+    Returns true if set() has been called on a node.
   +/
   bool isNodeSet(Node node)
   {
@@ -153,23 +150,19 @@

   TOK peekNext()
   {
-    auto state = lx.getState();
     Token* next = token;
     do
       lx.peek(next);
     while (next.isWhitespace) // Skip whitespace
-    lx.restoreState(state);
     return next.type;
   }

   TOK peekAfter(ref Token* next)
   {
     assert(next !is null);
-    auto state = lx.getState();
     do
       lx.peek(next);
     while (next.isWhitespace) // Skip whitespace
-    lx.restoreState(state);
     return next.type;
   }

@@ -190,6 +183,11 @@
     return set(new ModuleDeclaration(moduleFQN), begin);
   }

+  /++
+    DeclDefs:
+      DeclDef
+      DeclDefs
+  +/
   Declarations parseDeclarationDefinitions()
   {
     auto decls = new Declarations;
@@ -198,11 +196,11 @@
     return decls;
   }

-  /*
+  /++
     DeclDefsBlock:
         { }
         { DeclDefs }
-  */
+  +/
   Declarations parseDeclarationDefinitionsBlock()
   {
     auto begin = token;
@@ -211,7 +209,7 @@
     while (token.type != T.RBrace && token.type != T.EOF)
       decls ~= parseDeclarationDefinition();
     require(T.RBrace);
-    return set(decls, token);
+    return set(decls, begin);
   }

   Declaration parseDeclarationDefinition()
@@ -3858,7 +3856,6 @@
     assert(token.type == T.LParen);
     Token* next = token;
     uint level = 1;
-    auto state = lx.getState();
   Loop:
     while (1)
     {
@@ -3882,7 +3879,6 @@
       default:
       }
     }
-    lx.restoreState(state);
     return next.type == tok;
   }

@@ -4378,7 +4374,8 @@
       ++errorCount;
       return;
     }
-    lx.updateErrorLoc(token.start);
-    errors ~= new Information(InfoType.Parser, mid, lx.errorLoc.clone, Format(_arguments, _argptr, GetMsg(mid)));
+    auto location = Lexer.getLocation(this.token);
+    auto msg = Format(_arguments, _argptr, GetMsg(mid));
+    errors ~= new Information(InfoType.Parser, mid, location, msg);
   }
 }
--- a/trunk/src/dil/Token.d	Mon Nov 26 20:18:54 2007 +0100
+++ b/trunk/src/dil/Token.d	Fri Nov 30 20:17:29 2007 +0100
@@ -7,12 +7,6 @@
 import tango.stdc.stdlib : malloc, free;
 import tango.core.Exception;

-struct Position
-{
-  size_t loc;
-  size_t col;
-}
-
 enum TOK : ushort
 {
   Invalid,
@@ -24,9 +18,10 @@
   Shebang  = 3 | Whitespace,
   HashLine = 4 | Whitespace,
   Filespec = 5 | Whitespace,
-  Empty    = 6,
+  Newline  = 6 | Whitespace,
+  Empty    = 7,

-  Identifier = 7,
+  Identifier = 8,
   String,
   CharLiteral, WCharLiteral, DCharLiteral,

@@ -119,9 +114,8 @@

 struct Token
 {
-  TOK type;
-//   Position pos;
-
+  TOK type; /// The type of the token.
+  /// Pointers to the next and previous tokens (doubly-linked list.)
   Token* next, prev;

   char* ws;    /// Start of whitespace characters before token. Null if no WS.
@@ -130,15 +124,24 @@

   union
   {
+    /// For newline tokens.
     struct
     {
-      Token* line_num; // #line number
-      Token* line_filespec; // #line number filespec
+      char[] filePath;
+      uint lineNum;
+      uint lineNum_hline;
     }
+    /// For #line tokens.
+    struct
+    {
+      Token* tokLineNum; /// #line number
+      Token* tokLineFilespec; /// #line number filespec
+    }
+    /// For string tokens.
     struct
     {
       string str;
-      char pf; /// Postfix 'c', 'w' or 'd'
+      char pf; /// Postfix 'c', 'w', 'd' or 0 for none.
     version(D2)
       Token* tok_str; /// Points to the contents of a token string stored as a
                       /// doubly linked list. The last token is always '}' or
@@ -199,16 +202,30 @@
     return tokToString[tok];
   }

+  /++
+    Returns true if this is a token which can have newlines in it.
+    These can be any string literal except for escape literals
+    and block and nested comments.
+  +/
+  bool isMultiline()
+  {
+    return type == TOK.String && start[0] != '\\' ||
+           type == TOK.Comment && start[1] != '/';
+  }
+
+  /// Returns true if this is a keyword token.
   bool isKeyword()
   {
     return KeywordsBegin <= type && type <= KeywordsEnd;
   }

+  /// Returns true if this is a whitespace token.
   bool isWhitespace()
   {
     return !!(type & TOK.Whitespace);
   }

+  /// Returns true if this is a special token.
   bool isSpecialToken()
   {
     return *start == '_' && type != TOK.Identifier;
@@ -216,6 +233,7 @@

 version(D2)
 {
+  /// Returns true if this is a token string literal.
   bool isTokenStringLiteral()
   {
     return type == TOK.String && tok_str !is null;
@@ -256,8 +274,8 @@
   void destructHashLineToken()
   {
     assert(type == TOK.HashLine);
-    delete line_num;
-    delete line_filespec;
+    delete tokLineNum;
+    delete tokLineFilespec;
   }

 version(D2)
@@ -280,7 +298,8 @@
 }
 }

-const string[] tokToString = [
+/// A table mapping each TOK to a string.
+private const string[] tokToString = [
   "Invalid",

   "Illegal",
@@ -288,6 +307,7 @@
   "#! /shebang/",
   "#line",
   `"filespec"`,
+  "Newline",
   "Empty",

   "Identifier",
@@ -315,14 +335,14 @@

   ".", "..", "...",

-  "Unordered",
-  "UorE",
-  "UorG",
-  "UorGorE",
-  "UorL",
-  "UorLorE",
-  "LorEorG",
-  "LorG",
+  "!<>=", // Unordered
+  "!<>",  // UorE
+  "!<=",  // UorG
+  "!<",   // UorGorE
+  "!>=",  // UorL
+  "!>",   // UorLorE
+  "<>=",  // LorEorG
+  "<>",   // LorG

   "=", "==", "!=", "!",
   "<=", "<",
@@ -366,4 +386,4 @@
   "HEAD",
   "EOF"
 ];
-static assert(tokToString.length == TOK.MAX);
\ No newline at end of file
+static assert(tokToString.length == TOK.MAX);
--- a/trunk/src/main.d	Mon Nov 26 20:18:54 2007 +0100
+++ b/trunk/src/main.d	Fri Nov 30 20:17:29 2007 +0100
@@ -156,7 +156,7 @@
   foreach(decl; decls)
   {
     assert(decl !is null);
-//     writefln(indent, decl.classinfo.name, ": begin=%s end=%s", decl.begin ? decl.begin.srcText : "\33[31mnull\33[0m", decl.end ? decl.end.srcText : "\33[31mnull\33[0m");
+    Stdout.formatln("{}{}: begin={} end={}", indent, decl.classinfo.name, decl.begin ? decl.begin.srcText : "\33[31mnull\33[0m", decl.end ? decl.end.srcText : "\33[31mnull\33[0m");
     print(decl.children, indent ~ "  ");
   }
 }